twoafternoon.trtlive.DEGs.all.v3.0anno<-read_csv(file=file.path("..","output","twoafternoon.trtlive.DEGs.all.v3.0anno.csv"))
## Parsed with column specification:
## cols(
## genes = col_character(),
## logFC = col_double(),
## logCPM = col_double(),
## LR = col_double(),
## PValue = col_double(),
## FDR = col_double(),
## AGI = col_character(),
## At_symbol = col_character(),
## At_short_description = col_character(),
## perc_ID = col_double()
## )
twoafternoon.any.trtlive.DEGs.all.v3.0anno<-read_csv(file=file.path("..","output","twoafternoon.any.trtlive.DEGs.all.v3.0anno.csv"))
## Parsed with column specification:
## cols(
## genes = col_character(),
## logFC.soil_trtSBC_OLD = col_double(),
## logFC.soil_trtSBC_OLD.sampling_day03 = col_double(),
## logFC.soil_trtSBC_OLD.sampling_day04 = col_double(),
## logFC.soil_trtSBC_OLD.sampling_day06 = col_double(),
## logFC.soil_trtSBC_OLD.sampling_day08 = col_double(),
## logFC.soil_trtSBC_OLD.sampling_day10 = col_double(),
## logFC.soil_trtSBC_OLD.sampling_day13 = col_double(),
## logFC.soil_trtSBC_OLD.sampling_day14 = col_double(),
## logCPM = col_double(),
## LR = col_double(),
## PValue = col_double(),
## FDR = col_double(),
## AGI = col_character(),
## At_symbol = col_character(),
## At_short_description = col_character(),
## perc_ID = col_double()
## )
diurnal34.time.DEGs.all.v3.0anno<-read_csv(file=file.path("..","output","dge.diurnal34.time.DEGs.all.v3.0anno.csv"))
## Parsed with column specification:
## cols(
## genes = col_character(),
## logFC.sampling_time2_afternoon = col_double(),
## logFC.sampling_time3_evening_5.30 = col_double(),
## logFC.sampling_time4_night_1 = col_double(),
## logFC.sampling_time5_night_2 = col_double(),
## logCPM = col_double(),
## LR = col_double(),
## PValue = col_double(),
## FDR = col_double(),
## AGI = col_character(),
## At_symbol = col_character(),
## At_short_description = col_character(),
## perc_ID = col_double()
## )
diurnal1314.time.DEGs.all.v3.0anno<-read_csv(file=file.path("..","output","dge.diurnal1314.time.DEGs.all.v3.0anno.csv"))
## Parsed with column specification:
## cols(
## genes = col_character(),
## logFC.sampling_time2_afternoon = col_double(),
## logFC.sampling_time3_evening_5.30 = col_double(),
## logFC.sampling_time4_night_1 = col_double(),
## logFC.sampling_time5_night_2 = col_double(),
## logCPM = col_double(),
## LR = col_double(),
## PValue = col_double(),
## FDR = col_double(),
## AGI = col_character(),
## At_symbol = col_character(),
## At_short_description = col_character(),
## perc_ID = col_double()
## )
# check
diurnal1314.time.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dim() # [1] 12080 13
## [1] 12080 13
# select genes with higher CV
## classic way
co.var.df <- function(x) ( 100*apply(x,1,sd)/rowMeans(x) )
cpm.timecourse.v3.0$cv<-co.var.df(cpm.timecourse.v3.0[,-1])
# tidyverse way (no working)
#cpm.timecourse.v3.0 %>% slice(1:100) %>% select(-1) %>% group_by(%>% mutate(cv=map(.,co.var.df ))
a<-hist(cpm.timecourse.v3.0$cv)
a
## $breaks
## [1] 0 50 100 150 200 250 300 350 400 450 500 550 600 650 700
##
## $counts
## [1] 19889 5976 989 270 87 45 24 12 7 4 2 1
## [13] 1 1
##
## $density
## [1] 1.456643e-02 4.376739e-03 7.243299e-04 1.977443e-04 6.371759e-05
## [6] 3.295738e-05 1.757727e-05 8.788633e-06 5.126703e-06 2.929544e-06
## [11] 1.464772e-06 7.323861e-07 7.323861e-07 7.323861e-07
##
## $mids
## [1] 25 75 125 175 225 275 325 375 425 475 525 575 625 675
##
## $xname
## [1] "cpm.timecourse.v3.0$cv"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
# there are genes with extream value
cpm.timecourse.v3.0 %>% filter(cv>600)
# Check expression pattern
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(target.genes = cpm.timecourse.v3.0 %>% dplyr::filter(cv>450) %>% dplyr::slice(1:20)) ->p
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
p
ggsave(filename="../output/highCV.absvalue.genes.expression.png",width=11,height=8) # should I remove them????
#
sum(as.integer(cpm.timecourse.v3.0$cv>30))/dim(cpm.timecourse.v3.0)[1] # [1] 0.5207265
## [1] 0.5207265
sum(as.integer(cpm.timecourse.v3.0$cv>40))/dim(cpm.timecourse.v3.0)[1] # [1] 0.3725282. Larger CV than SAS timecourse data ()??? Due to non log absolute expression value.
## [1] 0.3725282
# cf. sum(as.integer(SAS.expression.vst.s.kazu$cv>4.5))/dim(SAS.expression.vst.s.kazu)[1] #[1] 0.2300789
cpm.timecourse.v3.0.log$cv<-co.var.df(cpm.timecourse.v3.0.log[,-1])
b<-hist(cpm.timecourse.v3.0.log$cv)
b
## $breaks
## [1] -300000 -280000 -260000 -240000 -220000 -200000 -180000 -160000 -140000
## [10] -120000 -100000 -80000 -60000 -40000 -20000 0 20000 40000
## [19] 60000 80000 100000 120000 140000
##
## $counts
## [1] 1 0 0 1 1 0 0 0 1 0 1 1
## [13] 1 7 1648 25634 7 1 1 2 0 1
##
## $density
## [1] 1.830965e-09 0.000000e+00 0.000000e+00 1.830965e-09 1.830965e-09
## [6] 0.000000e+00 0.000000e+00 0.000000e+00 1.830965e-09 0.000000e+00
## [11] 1.830965e-09 1.830965e-09 1.830965e-09 1.281676e-08 3.017431e-06
## [16] 4.693496e-05 1.281676e-08 1.830965e-09 1.830965e-09 3.661931e-09
## [21] 0.000000e+00 1.830965e-09
##
## $mids
## [1] -290000 -270000 -250000 -230000 -210000 -190000 -170000 -150000 -130000
## [10] -110000 -90000 -70000 -50000 -30000 -10000 10000 30000 50000
## [19] 70000 90000 110000 130000
##
## $xname
## [1] "cpm.timecourse.v3.0.log$cv"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
# use largeCV
cpm.timecourse.v3.0.log.largeCV<-cpm.timecourse.v3.0.log[cpm.timecourse.v3.0[cpm.timecourse.v3.0$cv>40,"transcript_ID"],]
dim(cpm.timecourse.v3.0.log.largeCV) # [1] 17262 289 > [1] 10173 290 (02/01/2020) (cf. SAS.expression.vst.s.kazu.largeCV is 7025 288)
## [1] 10173 290
c<-hist(cpm.timecourse.v3.0.log.largeCV$cv)
c
## $breaks
## [1] -30000 -25000 -20000 -15000 -10000 -5000 0 5000 10000 15000
## [11] 20000 25000 30000 35000
##
## $counts
## [1] 1 0 3 2 6 348 5692 6 4 1 1 0 1
##
## $density
## [1] 3.297609e-08 0.000000e+00 9.892828e-08 6.595218e-08 1.978566e-07
## [6] 1.147568e-05 1.876999e-04 1.978566e-07 1.319044e-07 3.297609e-08
## [11] 3.297609e-08 0.000000e+00 3.297609e-08
##
## $mids
## [1] -27500 -22500 -17500 -12500 -7500 -2500 2500 7500 12500 17500
## [11] 22500 27500 32500
##
## $xname
## [1] "cpm.timecourse.v3.0.log.largeCV$cv"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
###########
#save(cpm.timecourse.v3.0.log.largeCV,file=file.path("..","output","cpm.timecourse.v3.0.log.largeCV.Rdata"))
write_csv(cpm.timecourse.v3.0.log.largeCV,path=file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz"))
# The following setting is important, do not omit.
library(WGCNA) # errors in installing WGCNA on my computer at impute package installation (Jan 27, 2020). Use Whitney
options(stringsAsFactors = FALSE)
if(Sys.info()["nodename"]=="whitney") {
enableWGCNAThreads(10) # in Whitney (Maloof lab server)
} else if (Sys.info()["nodename"]=="Kazu-MBP.plb.ucdavis.edu") {
enableWGCNAThreads(2) # in my computer
}
#cpm.timecourse.v3.0.log.largeCV<-read_csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz"))
# for some reasons in Whitney library columns were read ad character. Needs to fix it.
#cpm.timecourse.v3.0.log.largeCV<-read_csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz"),
# col_types=list(col_character(),col_double())) # error
cpm.timecourse.v3.0.log.largeCV<-read.csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz")) # using classic read.csv in Whitney
#load(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.Rdata"))
#
datExpr <-t(cpm.timecourse.v3.0.log.largeCV[,-1])
# Choose a set of soft-thresholding powers
powers = c(c(1:9), seq(from = 2, to=20, by=10))
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
# Plot the results:
#sizeGrWindow(9, 5)
pdf("../output/largeCV.softthresholding.pdf",width=10,height=8)
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
dev.off()
#
net = blockwiseModules(datExpr, power = 9,
TOMType = "unsigned", minModuleSize = 20,
reassignThreshold = 0, mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs = TRUE,
saveTOMFileBase = "cpm.timecourse.v3.0.log.largeCV.TOM",
verbose = 3)
save(net,file="../output/net.cpm.timecourse.v3.0.log.largeCV.Rdata")
# open a graphics window
pdf(file="../output/largeCV.dendrogram.pdf",width=10,height=8)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
dev.off()
# save parameters
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs
geneTree = net$dendrograms[[1]]
save(MEs, moduleLabels, moduleColors, geneTree,file ="../output/all.largeCV.RData")
cpm.timecourse.v3.0.log.largeCV<-read.csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz"))
dim(cpm.timecourse.v3.0.log.largeCV) # [1] 17262 289 -> [1] 10173 290 (Feb 01, 2020)
## [1] 10173 290
load("../output/net.cpm.timecourse.v3.0.log.largeCV.Rdata")
load("../output/all.largeCV.RData")
# how many modules?
table(net$colors);length(table(net$colors)) # 7 modules
##
## 0 1 2 3 4 5 6
## 4968 4723 174 126 79 72 31
## [1] 7
cpm.timecourse.v3.0.log.largeCV.modules <- tibble(
transcript_ID=cpm.timecourse.v3.0.log.largeCV$transcript_ID,
modules=moduleColors
)
#cpm.timecourse.v3.0.log.largeCV.modules.list<-list(transcript_ID=cpm.timecourse.v3.0.log.largeCV$transcript_ID,modules=moduleColors)
## prep
# annotation file for v3.0annotation
Br.v3.0.At.BLAST <- read_csv(file.path("..","Annotation_copy","output","v3.0annotation","Brapa_v3.0_annotated.csv"))
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_double(),
## name = col_character(),
## chrom = col_character(),
## subject = col_character(),
## AGI = col_character(),
## At_symbol = col_character(),
## At_full_name = col_character(),
## At_gene_model_type = col_character(),
## At_short_description = col_character(),
## At_Curator_summary = col_character(),
## At_Computational_description = col_character()
## )
## See spec(...) for full column specifications.
# This annotation is redundant with name (Br grene). Eg
Br.v3.0.At.BLAST %>% filter(name=="BraA01g040570.3C")
# reduce the redundancy (112418)
Br.v3.0anno.At.BLAST.highscore <- Br.v3.0.At.BLAST %>% group_by(name) %>% arrange(desc(score)) %>% dplyr::slice(1)
# function for adding annotation
## get object name https://stackoverflow.com/questions/14577412/how-to-convert-variable-object-name-into-string
myfunc <- function(v1) {
deparse(substitute(v1))
}
myfunc(foo)
## [1] "foo"
# adding annotation and write_csv adding ".v3.0anno.csv" to the object name.
addAnno<-function(DGE) {temp<-left_join(DGE %>% rownames_to_column(var="genes"),Br.v3.0anno.At.BLAST.highscore,by=c(genes="name")) %>% dplyr::select(genes,names(DGE),AGI, At_symbol, At_short_description, perc_ID); print(deparse(substitute(DGE)));
write_csv(temp, path=file.path("..","output",paste(deparse(substitute(DGE)),".v3.0anno.csv",sep="")));
return(temp)}
#Br.v3.0anno.At.BLAST.highscore.list<-list()
Bra.v3.0_cdna.list<-list()
#names(Bra.v3.0_cdna.list)<-names(Bra.v3.0_cdna)
names(Bra.v3.0_cdna) %in% cpm.timecourse.v3.0.log.largeCV.modules$transcript_ID
for(i in 1:length(Bra.v3.0_cdna)) {
print(paste("i is ",i))
print(cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==as_vector(names(Bra.v3.0_cdna))[i]) %>% dplyr::select(transcript_ID))
print(cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==as_vector(names(Bra.v3.0_cdna))[i]) %>% dplyr::select(transcript_ID) %>% dim())
print(cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==as_vector(names(Bra.v3.0_cdna))[i]) %>% dplyr::select(transcript_ID) %>% dim() ==c(1,1))
temp<-cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==names(Bra.v3.0_cdna)[i]) %>% dplyr::select(transcript_ID)
print(dim(temp)[1]==0)
if(dim(temp)[1]==0) next else
#Bra.v3.0_cdna.list[[i]]<-cpm.timecourse.v3.0.log.largeCV.modules[names(Bra.v3.0_cdna)[i],"modules"]
# input module
Bra.v3.0_cdna.list[[i]]<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID==names(Bra.v3.0_cdna)[i]) %>% dplyr::select(modules) %>% as_vector()
# iput gene name
names(Bra.v3.0_cdna.list)[[i]]<-names(Bra.v3.0_cdna)[i]
}
# clean up Brgo.v3.0_cdna.list
table(sapply(Bra.v3.0_cdna.list,is.null))
Bra.v3.0_cdna.list<-Bra.v3.0_cdna.list[!sapply(Bra.v3.0_cdna.list,is.null)]
table(sapply(Bra.v3.0_cdna.list,is.null))
save(Bra.v3.0_cdna.list,file="../output/Bra.v3.0_cdna.list.Rdata")
######### Did not work
# cpm.timecourse.v3.0.log.largeCV.modules %>% nest(transcript_ID) # this is not what I want
# library(purrr)
#cpm.timecourse.v3.0.log.largeCV.modules %>% purrr::transpose()
# loading module info as custom categories compatible with goseq()
load("../output/Bra.v3.0_cdna.list.Rdata")
# GOseq
library(ShortRead);library(goseq);library(GO.db);library("annotate")
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
## Loading required package: BiocParallel
## Loading required package: Biostrings
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:glue':
##
## collapse, trim
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## Loading required package: XVector
##
## Attaching package: 'XVector'
## The following object is masked from 'package:purrr':
##
## compact
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
## Loading required package: Rsamtools
## Loading required package: GenomeInfoDb
## Loading required package: GenomicRanges
## Loading required package: GenomicAlignments
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
##
## simplify
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum
##
## Attaching package: 'GenomicAlignments'
## The following object is masked from 'package:dplyr':
##
## last
##
## Attaching package: 'ShortRead'
## The following object is masked from 'package:dplyr':
##
## id
## The following object is masked from 'package:purrr':
##
## compose
## The following object is masked from 'package:tibble':
##
## view
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
##
## Attaching package: 'geneLenDataBase'
## The following object is masked from 'package:S4Vectors':
##
## unfactor
##
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: XML
# for ggplot heatmap
## uncompress gz file
system(paste("gunzip -c ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.gz")," > ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")))
## read cDNA fasta file
Bra.v3.0_cdna<-readDNAStringSet(file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")) # copied from /Volumes/data_work/Data8/NGS_related/Brassica_rapa_Upendra/G3
Bra.v3.0_cdna
## A DNAStringSet instance of length 46250
## width seq names
## [1] 1254 ATGCGACCACCGGGTGTTGTT...GAGTCTCTCTTGCTCGCTTAA BraA01g000010.3C
## [2] 1668 ATGCCAGCAATGCATGCCGTT...AGATGGATCACAAAAGATTAA BraA01g000020.3C
## [3] 957 ATGATGCTTCTCGTTCATACC...AACTTGGAGTTCCCTGAGTGA BraA01g000030.3C
## [4] 1299 ATGAGTCGTCTTCTCCTTGCT...GGGTCACGAGATGAGCTATAA BraA01g000040.3C
## [5] 774 ATGGATTCTGGGCTTCAGCAT...GGAAAGCAGTTCCTTTCGTGA BraA01g000050.3C
## ... ... ...
## [46246] 162 ATGCGTCCGTCCTCAGCTCCC...TCTTTGGTGGTCCGGTTCTAA BraAnng001840.3C
## [46247] 1455 ATGTCTAATCAAGGATCAGGA...ACAGGTTTGTTTAGGTGCTAA BraAnng001850.3C
## [46248] 1011 ATGGACAACGTAATTCTGAAA...TCAGGGAAGAAAAGCCCCTGA BraAnng006150.3C
## [46249] 870 ATGTTTCCAAGACGTACAAGG...AGCAGTTGTCCTTATAGTTAG BraAnng000040.3C
## [46250] 1338 ATGCCGCAACAATACTGGAAC...GGAGAGAACCTTATCTCCTGA BraAnng003440.3C
## remove fasta file
system(paste("rm ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa"),sep=""))
# special funciton for GOseq
GOseq.customcategory.ORA<-function(genelist,padjust=0.05,custom.category.list=Bra.v3.0_cdna.list,Br_cdna=Bra.v3.0_cdna) { # return GO enrichment table, padjus, padjust=0.05.
bias<-nchar(Br_cdna)
names(bias)<-names(Br_cdna)
TF<-(names(bias) %in% genelist)*1
names(TF)<-names(bias)
#print(TF)
pwf<-nullp(TF,bias.data=bias)
#print(pwf$DEgenes)
GO.pval <- goseq(pwf,gene2cat=custom.category.list,use_genes_without_cat=TRUE) # format became different in new goseq version (021111). Does not work (042716)
#GO.pval <- goseq(pwf,gene2cat=Brgo.DF3,use_genes_without_cat=TRUE) # format became different in new goseq version (021111)
GO.pval$over_represented_padjust<-p.adjust(GO.pval$over_represented_pvalue,method="BH")
if(GO.pval$over_represented_padjust[1]>padjust) return("no enriched GO")
else {
enriched.GO<-GO.pval[GO.pval$over_represented_padjust<padjust,]
print("enriched.GO is")
print(enriched.GO)
return(enriched.GO)
}
}
gene.up<-twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(logFC>0&FDR<0.05) %>% dplyr::select(genes) %>% as_vector()
gene.down<-twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(logFC<0&FDR<0.05) %>% dplyr::select(genes) %>% as_vector()
enriched.GO.up<-GOseq.customcategory.ORA(genelist=gene.up) # needs to wait for Bra.v3.0_cdna.list.Rdata ready in Whitney
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 13 lightyellow 5.053465e-08 1.0000000 7
## 21 tan 1.651936e-04 0.9999827 6
## numInCat over_represented_padjust
## 13 32 1.162297e-06
## 21 73 1.899727e-03
enriched.GO.down<-GOseq.customcategory.ORA(genelist=gene.down)
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 23 yellow 4.518946e-176 1.0000000 149
## 16 pink 2.553492e-32 1.0000000 40
## 12 lightgreen 1.895763e-05 0.9999984 7
## 17 purple 1.741131e-03 0.9995694 9
## numInCat over_represented_padjust
## 23 242 1.039358e-174
## 16 128 2.936516e-31
## 12 33 1.453418e-04
## 17 101 1.001150e-02
n<-1
gene.up.category<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID %in% gene.up,modules==enriched.GO.up$category[n])
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(target.genes=gene.up.category[1:10,]) # works
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
## Error: Faceting variables must have at least one value
# expression pattern of module/genes of interest (normalized value)
# scaling expression data
cpm.timecourse.v3.0.scale<-t(scale(t(cpm.timecourse.v3.0[,-1]))) %>% as_tibble() %>% bind_cols(data.frame(transcript_ID=cpm.timecourse.v3.0$transcript_ID[]),.)
gene.up.category<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID %in% gene.up,modules==enriched.GO.up$category[n])
gene.down.category<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID %in% gene.down,modules==enriched.GO.up$category[n])
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(data=cpm.timecourse.v3.0.scale,target.genes=gene.up.category[1,])
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
## Error: Faceting variables must have at least one value
input<-tribble(
~target.genes,~data,~f,
gene.up.category[1:10,],cpm.timecourse.v3.0.scale,expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2,
gene.down.category[1:10,],cpm.timecourse.v3.0.scale,expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2
)
input2<-tribble(
~f,~param,
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2,list(target.genes=gene.up.category[1:10,],data=cpm.timecourse.v3.0.scale),
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2,list(target.genes=gene.down.category[1:10,],data=cpm.timecourse.v3.0.scale)
)
test<-input2 %>% mutate(output=invoke_map(f,param)) # works, but parameters are not visible
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# how about to use map2?
## an example
params<-tribble(
~mean,~sd,~n,
5,1,1,
10,5,3,
-3,10,5
)
params %>% pmap(rnorm)
## [[1]]
## [1] 4.401924
##
## [[2]]
## [1] 13.629514 8.742931 10.435519
##
## [[3]]
## [1] -19.4951056 -21.3628010 -17.8922653 0.3945985 4.3824799
#
input3<-tribble(
~target.genes,~data,~title,
gene.up.category[1:10,],cpm.timecourse.v3.0.scale,"2-afternoon soil up",
gene.down.category[1:10,],cpm.timecourse.v3.0.scale,"2-afternoon soil down",
)
#input3 %>% pmap(expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2) -> expression.pattern
#
expression.pattern <- input3 %>% mutate(plot=pmap(.,expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2))
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
expression.pattern$plot[1] # plot
## [[1]]
## Error: Faceting variables must have at least one value
#input3 %>% mutate(plot=invoke_map(~expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2)) # errors
temp.abs<-cpm.timecourse.v3.0.log %>% head() %>%
gather(sample,value,-transcript_ID) %>%
mutate(abs.value=2^value) %>%
inner_join(sample.description.timecourse, by="sample") %>%
split(.$soil_trt)
mean.by.soil<-function(x) {group_by(x, group,transcript_ID) %>% summarize(mean=mean(abs.value))}
temp.abs.mean<-temp.abs %>% map(.,mean.by.soil)
cpm.timecourse.v3.0.logFC<-tibble(transcript_ID=temp.abs.mean[["SBC_OLD"]]$transcript_ID,group=temp.abs.mean[["SBC_OLD"]]$group,logFC=log(temp.abs.mean[["SBC_OLD"]]$mean/temp.abs.mean[["ATM_BLANK"]]$mean)) %>% left_join(sample.description.timecourse %>% dplyr::select("group","sampling_day","sampling_time"),by="group")
# check logFC range
range(cpm.timecourse.v3.0.logFC$logFC) #(Jan 31, 2020)
temp.abs<-cpm.timecourse.v3.0.log %>%
gather(sample,value,-transcript_ID) %>% mutate(abs.value=2^value) %>%
inner_join(sample.description.timecourse, by="sample") %>%
split(.$soil_trt)
# mean of absolute value funciton
mean.by.soil<-function(x) {group_by(x, group,transcript_ID) %>% summarize(mean=mean(abs.value))}
# calculating absolute value mean
temp.abs.mean<-temp.abs %>% map(.,mean.by.soil)
# making summary tibble
temp2<-sample.description.timecourse %>% dplyr::select("group","sampling_day","sampling_time")
sample.description.timecourse.logFC<-temp2[!duplicated(temp2),]
# add sample info
cpm.timecourse.v3.0.logFC<-tibble(transcript_ID=temp.abs.mean[["SBC_OLD"]]$transcript_ID,group=temp.abs.mean[["SBC_OLD"]]$group,logFC=log(temp.abs.mean[["SBC_OLD"]]$mean/temp.abs.mean[["ATM_BLANK"]]$mean)) %>% left_join(sample.description.timecourse.logFC,by="group")
# check
dim(cpm.timecourse.v3.0.logFC)
# check frequency distribution
a<-hist(cpm.timecourse.v3.0.logFC$logFC) # most of them are small
a
# what are genes with super high logFC?
high.FC.genes<-cpm.timecourse.v3.0.logFC %>% filter(abs(logFC)>5) %>% dplyr::select(transcript_ID)
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(target.genes = high.FC.genes[1:10,])
#
addAnno2<-function(DGE) {temp<-left_join(DGE,Br.v3.0anno.At.BLAST.highscore,by=c("transcript_ID"="name")) %>% dplyr::select(transcript_ID,names(DGE),AGI, At_symbol, At_short_description, perc_ID); print(deparse(substitute(DGE)));
write_csv(temp, path=file.path("..","output",paste(deparse(substitute(DGE)),".v3.0anno.csv",sep="")));
return(temp)}
#
addAnno2(high.FC.genes)
#
dim(cpm.timecourse.v3.0.logFC) #[1] 1110000 5
#write_csv(cpm.timecourse.v3.0.logFC,path="../output/cpm.timecourse.v3.0.logFC.csv") # too large (306 M)
write_csv(cpm.timecourse.v3.0.logFC,path="../output/cpm.timecourse.v3.0.logFC.csv.gz") # 12.3 M
cpm.timecourse.v3.0.logFC <-read_csv("../output/cpm.timecourse.v3.0.logFC.csv.gz")
## Parsed with column specification:
## cols(
## transcript_ID = col_character(),
## group = col_character(),
## logFC = col_double(),
## sampling_day = col_character(),
## sampling_time = col_character()
## )
target.genes<-gene.up
# expression.pattern.Br.graph.timecourse.v3.0annotation.logFC<-function(data=cpm.timecourse.v3.0.logFC,target.genes,title="",subset.data="only_two_afternoon"){
# #print(paste("data is",data[1:10,]))
# #print(paste("tissue.type is root"))
# data[is.na(data)] <- 0 #
# data.temp<-data %>% dplyr::filter(transcript_ID %in% target.genes)
#
# # if (2-afternoon=TRUE)
# if (subset.data=="only_two_afternoon") {
# p<-data.temp %>% ggplot(aes(x=sampling_day,y=logFC)) +
# geom_boxplot(alpha = 0.5) +
# theme_bw() +
# theme(strip.text.y=element_text(angle=0),axis.text.x=element_text(angle=90)) +
# theme(legend.position="bottom") + labs(title=title)
# p
# } else {print("Define subset.data other than only_two_afternoon.")}
# }
# test the function
expression.pattern.Br.graph.timecourse.v3.0annotation.logFC(target.genes=gene.up,subset.data="only_two_afternoon")
# 2_afternoon DEG expression data (scaled)
cpm.timecourse.v3.0.scale.twoafternoon.DEG<-cpm.timecourse.v3.0.scale %>% dplyr::select(-cv) %>%
inner_join(twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>%
gather(sample,value,-1) %>% inner_join(sample.description.timecourse,by="sample") %>% filter(sampling_time=="2_afternoon")
## Warning: Column `transcript_ID`/`genes` joining factor and character vector,
## coercing into character vector
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# spread
cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread<-cpm.timecourse.v3.0.scale.twoafternoon.DEG %>% dplyr::select(transcript_ID,sample,value) %>% spread(sample,value,-1)
dim(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread) # [1] 1442 97
## [1] 1442 97
# calculate wss
wss <- (nrow(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1],2,var))
for (i in 2:20) wss[i] <- sum(kmeans(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1],
centers=i,iter.max = 10)$withinss) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
plot(1:20, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")
Let’s perform the actual clsutering using K=8:
set.seed(20)
kClust.8 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], centers=8, nstart = 1000, iter.max = 20)
kClusters.8 <- kClust.8$cluster
# number of clusters
cluster.8.num<-tibble(cluster=kClusters.8) %>% group_by(cluster) %>% summarize(n=sum(cluster))
cluster.8.num$cluster<-as.character(cluster.8.num$cluster) # classic way
Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i
clust.centroid = function(i, dat, clusters) {
ind = (clusters == i)
colMeans(dat[ind,])
}
kClustcentroids.8 <- sapply(levels(factor(kClusters.8)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], kClusters.8)
# adding sample description to data
data.sample<-kClustcentroids.8 %>% as_tibble(rownames="sample") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse,by="sample") %>%
inner_join(cluster.8.num,by="cluster") %>%
mutate(cluster.n=glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>% inner_join(
data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>% dplyr::slice(rep(1:768)[!duplicated(.$group.cluster)])
# plot
p8<- ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) +
geom_point() + geom_hline(yintercept=0,color="red") +
geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): eight clusters",color = "Cluster",y="scaled expression level")
p8
ggsave(p8,file="../output/Twoafternoon.DEG.Kmean.8clusters.png",width=11,height=8)
set.seed(20)
kClust.5 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], centers=5, nstart = 1000, iter.max = 20)
kClusters.5 <- kClust.5$cluster
# number of clusters
cluster.5.num<-tibble(cluster=kClusters.5) %>% group_by(cluster) %>% summarize(n=sum(cluster))
cluster.5.num$cluster<-as.character(cluster.5.num$cluster) # classic way
kClustcentroids.5 <- sapply(levels(factor(kClusters.5)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], kClusters.5)
# adding sample description to data
data.sample<-kClustcentroids.5 %>% as_tibble(rownames="sample") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse,by="sample") %>%
inner_join(cluster.5.num,by="cluster") %>%
mutate(cluster.n=glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>% inner_join(
data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>% dplyr::slice(rep(1:480)[!duplicated(.$group.cluster)]) # only cluster 1... why???
# plot
p5<- ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) +
geom_point() + geom_hline(yintercept=0,color="red") +
geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): five clusters",color = "Cluster",y="scaled expression level")
p5
ggsave(p5,file="../output/Twoafternoon.DEG.Kmean.5clusters.png",width=11,height=8)
expression.pattern.Br.graph.timecourse.v3.0annotation.logFC(target.genes=gene.up,subset.data="only_two_afternoon")
# 2_afternoon DEG expression data (scaled)
cpm.timecourse.v3.0.logFC.twoafternoon.DEG<-cpm.timecourse.v3.0.logFC %>%
inner_join(twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>% filter(sampling_time=="2_afternoon")
# spread
cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread<-cpm.timecourse.v3.0.logFC.twoafternoon.DEG %>% dplyr::select(transcript_ID,group,logFC) %>% spread(group,logFC,-1)
dim(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread) # [1] 1474 97
## [1] 1442 9
# calculate wss
wss.logFC <- (nrow(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1],2,var))
for (i in 2:20) wss.logFC[i] <- sum(kmeans(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1],
centers=i,iter.max = 20)$withinss) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
plot(1:20, wss.logFC, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")
# Let’s perform the actual clsutering using K=5:
set.seed(20)
kClust.logFC.5 <- kmeans(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], centers=5, nstart = 1000, iter.max = 20)
kClusters.logFC.5 <- kClust.logFC.5$cluster
# number of clusters
cluster.5.num<-tibble(cluster=kClusters.logFC.5) %>% group_by(cluster) %>% summarize(n=sum(cluster))
cluster.5.num$cluster<-as.character(cluster.5.num$cluster) # classic way
kClustcentroids.logFC.5 <- sapply(levels(factor(kClusters.logFC.5)), clust.centroid, cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], kClusters.logFC.5)
# making sample.description.timecourse.logFC
temp2<-sample.description.timecourse %>% dplyr::select("group","sampling_day","sampling_time")
sample.description.timecourse.logFC<-temp2[!duplicated(temp2),]
# plot
p.logFC.5<-kClustcentroids.logFC.5 %>% as_tibble(rownames="group") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse.logFC,by="group") %>%
inner_join(cluster.5.num,by="cluster") %>%
mutate(cluster.n=glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) ) %>%
ggplot(aes(x=sampling_day,y=value, group=cluster, colour=as.factor(cluster))) +
geom_point() + geom_hline(yintercept=0,color="red") +
facet_grid(cluster.n~.) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): five clusters",color = "Cluster",y="scaled expression level")
## Warning: Column `group` joining character vector and factor, coercing into
## character vector
p.logFC.5
ggsave(p.logFC.5,file="../output/Twoafternoon.DEG.logFC.Kmean.5clusters.png",width=11,height=8)
set.seed(20)
kClust.logFC.8 <- kmeans(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], centers=8, nstart = 1000, iter.max = 20)
kClusters.logFC.8 <- kClust.logFC.8$cluster
# number of clusters
cluster.8.num<-tibble(cluster=kClusters.logFC.8) %>% group_by(cluster) %>% summarize(n=sum(cluster))
cluster.8.num$cluster<-as.character(cluster.8.num$cluster) # classic way
kClustcentroids.logFC.8 <- sapply(levels(factor(kClusters.logFC.8)), clust.centroid, cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], kClusters.logFC.8)
p.logFC.8<-kClustcentroids.logFC.8 %>% as_tibble(rownames="group") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse.logFC,by="group") %>%
inner_join(cluster.8.num,by="cluster") %>%
mutate(cluster.n=glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) ) %>%
ggplot(aes(x=sampling_day,y=value, group=cluster, colour=as.factor(cluster))) +
geom_point() + geom_hline(yintercept=0,color="red") +
facet_grid(cluster.n~.) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): eight clusters",color = "Cluster",y="scaled expression level")
## Warning: Column `group` joining character vector and factor, coercing into
## character vector
p.logFC.8
ggsave(p.logFC.8,file="../output/Twoafternoon.DEG.logFC.Kmean.8clusters.png",width=11,height=8)
load(file.path("..","Annotation_copy","output","v3.0annotation","Brgo.v3.0anno.Atgoslim.BP.list.Rdata"))
# GOseq
library(ShortRead);library(goseq);library(GO.db);library("annotate")
# for ggplot heatmap
## uncompress gz file
system(paste("gunzip -c ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.gz")," > ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")))
## read cDNA fasta file
Bra.v3.0_cdna<-readDNAStringSet(file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")) # copied from /Volumes/data_work/Data8/NGS_related/Brassica_rapa_Upendra/G3
Bra.v3.0_cdna
## A DNAStringSet instance of length 46250
## width seq names
## [1] 1254 ATGCGACCACCGGGTGTTGTT...GAGTCTCTCTTGCTCGCTTAA BraA01g000010.3C
## [2] 1668 ATGCCAGCAATGCATGCCGTT...AGATGGATCACAAAAGATTAA BraA01g000020.3C
## [3] 957 ATGATGCTTCTCGTTCATACC...AACTTGGAGTTCCCTGAGTGA BraA01g000030.3C
## [4] 1299 ATGAGTCGTCTTCTCCTTGCT...GGGTCACGAGATGAGCTATAA BraA01g000040.3C
## [5] 774 ATGGATTCTGGGCTTCAGCAT...GGAAAGCAGTTCCTTTCGTGA BraA01g000050.3C
## ... ... ...
## [46246] 162 ATGCGTCCGTCCTCAGCTCCC...TCTTTGGTGGTCCGGTTCTAA BraAnng001840.3C
## [46247] 1455 ATGTCTAATCAAGGATCAGGA...ACAGGTTTGTTTAGGTGCTAA BraAnng001850.3C
## [46248] 1011 ATGGACAACGTAATTCTGAAA...TCAGGGAAGAAAAGCCCCTGA BraAnng006150.3C
## [46249] 870 ATGTTTCCAAGACGTACAAGG...AGCAGTTGTCCTTATAGTTAG BraAnng000040.3C
## [46250] 1338 ATGCCGCAACAATACTGGAAC...GGAGAGAACCTTATCTCCTGA BraAnng003440.3C
## remove fasta file
system(paste("rm ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa"),sep=""))
# GOseq function
GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA<-function(genelist,padjust=0.05,ontology="BP",custom.category.list=Brgo.v3.0anno.Atgoslim.BP.list,Br_cdna=Bra.v3.0_cdna) { # return GO enrichment table, padjus, padjust=0.05.
bias<-nchar(Br_cdna)
names(bias)<-names(Br_cdna)
TF<-(names(bias) %in% genelist)*1
names(TF)<-names(bias)
#print(TF)
pwf<-nullp(TF,bias.data=bias)
#print(pwf$DEgenes)
GO.pval <- goseq(pwf,gene2cat=custom.category.list,use_genes_without_cat=TRUE) # format became different in new goseq version (021111). Does not work (042716)
#GO.pval <- goseq(pwf,gene2cat=Brgo.DF3,use_genes_without_cat=TRUE) # format became different in new goseq version (021111)
#head(GO.pval)
if(ontology=="BP") {
GO.pval2<-subset(GO.pval,ontology=="BP")
} else if(ontology=="CC") {
GO.pval2<-subset(GO.pval,ontology=="CC")
} else {
GO.pval2<-subset(GO.pval,ontology=="MF")
}
GO.pval2$over_represented_padjust<-p.adjust(GO.pval2$over_represented_pvalue,method="BH")
if(GO.pval2$over_represented_padjust[1]>padjust) return("no enriched GO")
else {
enriched.GO<-GO.pval2[GO.pval2$over_represented_padjust<padjust,]
print("enriched.GO is")
print(enriched.GO)
## write Term and Definition
for(i in 1:dim(enriched.GO)[1]) {
if(is.null(Term(GOTERM[enriched.GO[i,"category"]]))) {next} else {
enriched.GO$Term[i]<-Term(GOTERM[[enriched.GO[i,"category"]]])
enriched.GO$Definition[i]<-Definition(GOTERM[[enriched.GO[i,"category"]]])
}
}
return(enriched.GO)
}
}
#
head(Bra.v3.0_cdna)
## A DNAStringSet instance of length 6
## width seq names
## [1] 1254 ATGCGACCACCGGGTGTTGTTTC...CTGAGTCTCTCTTGCTCGCTTAA BraA01g000010.3C
## [2] 1668 ATGCCAGCAATGCATGCCGTTTT...GTAGATGGATCACAAAAGATTAA BraA01g000020.3C
## [3] 957 ATGATGCTTCTCGTTCATACCCG...GGAACTTGGAGTTCCCTGAGTGA BraA01g000030.3C
## [4] 1299 ATGAGTCGTCTTCTCCTTGCTCA...GTGGGTCACGAGATGAGCTATAA BraA01g000040.3C
## [5] 774 ATGGATTCTGGGCTTCAGCATCT...AAGGAAAGCAGTTCCTTTCGTGA BraA01g000050.3C
## [6] 3327 ATGGCGTCCACTCCTCCTCAAAA...GCGGTGGGTTTCAATTTCCTTGA BraA01g000060.3C
# length(bias) # 44239 > 45019 where the bias come from?
# bias.data vector must have the same length as DEgenes vector!
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread$transcript_ID, cluster=kClusters.8) %>% split(.$cluster) %>% map(function(x) {GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(genelist=x$transcript_ID)})
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742 1.732470e-08 1.0000000 18
## 423 GO:0006468 1.855801e-08 1.0000000 29
## 2750 GO:0050832 1.926103e-08 1.0000000 14
## 1858 GO:0031348 2.494123e-05 0.9999987 5
## 638 GO:0006952 3.373325e-05 0.9999904 18
## 890 GO:0009611 4.127141e-05 0.9999930 10
## 3442 GO:1900067 5.827127e-05 0.9999999 2
## 1000 GO:0009814 6.360706e-05 0.9999976 4
## 921 GO:0009651 6.434943e-05 0.9999832 15
## 3242 GO:0080119 1.300857e-04 0.9999973 3
## numInCat term ontology
## 2253 726 defense response to bacterium BP
## 423 1484 protein phosphorylation BP
## 2750 469 defense response to fungus BP
## 1858 64 negative regulation of defense response BP
## 638 1165 defense response BP
## 890 419 response to wounding BP
## 3442 3 regulation of cellular response to alkaline pH BP
## 1000 49 defense response, incompatible interaction BP
## 921 1045 response to salt stress BP
## 3242 17 ER body organization BP
## over_represented_padjust
## 2253 2.432668e-05
## 423 2.432668e-05
## 2750 2.432668e-05
## 1858 2.362558e-02
## 638 2.556305e-02
## 890 2.606289e-02
## 3442 2.709111e-02
## 1000 2.709111e-02
## 921 2.709111e-02
## 3242 4.928947e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 390 GO:0006412 3.80768e-07 1 11
## numInCat term ontology over_represented_padjust
## 390 715 translation BP 0.00144273
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345 3.409045e-11 1 7
## 2259 GO:0042761 1.726215e-08 1 5
## 1181 GO:0010143 1.931256e-06 1 4
## numInCat term ontology
## 1291 41 suberin biosynthetic process BP
## 2259 30 very long-chain fatty acid biosynthetic process BP
## 1181 29 cutin biosynthetic process BP
## over_represented_padjust
## 1291 1.291687e-07
## 2259 3.270314e-05
## 1181 2.439177e-03
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1210 GO:0010200 3.659965e-11 1.0000000 17
## 2253 GO:0042742 2.432098e-08 1.0000000 23
## 1174 GO:0010120 7.293180e-08 1.0000000 6
## 1166 GO:0010112 6.780014e-07 1.0000000 5
## 638 GO:0006952 8.277917e-07 0.9999998 27
## 899 GO:0009626 2.954187e-06 0.9999997 9
## 894 GO:0009617 3.846114e-06 0.9999994 11
## 890 GO:0009611 6.457569e-06 0.9999987 14
## 855 GO:0009409 1.517719e-05 0.9999961 17
## 944 GO:0009697 2.834944e-05 0.9999992 4
## 2750 GO:0050832 3.456585e-05 0.9999924 13
## 1205 GO:0010193 5.358930e-05 0.9999968 5
## 960 GO:0009737 7.818971e-05 0.9999759 18
## 187 GO:0002237 8.857740e-05 0.9999917 6
## 185 GO:0002229 1.515927e-04 0.9999802 7
## numInCat term ontology
## 1210 286 response to chitin BP
## 2253 726 defense response to bacterium BP
## 1174 27 camalexin biosynthetic process BP
## 1166 22 regulation of systemic acquired resistance BP
## 638 1165 defense response BP
## 899 140 plant-type hypersensitive response BP
## 894 241 response to bacterium BP
## 890 419 response to wounding BP
## 855 696 response to cold BP
## 944 21 salicylic acid biosynthetic process BP
## 2750 469 defense response to fungus BP
## 1205 54 response to ozone BP
## 960 832 response to abscisic acid BP
## 187 88 response to molecule of bacterial origin BP
## 185 126 defense response to oomycetes BP
## over_represented_padjust
## 1210 1.386761e-07
## 2253 4.607609e-05
## 1174 9.211286e-05
## 1166 6.273006e-04
## 638 6.273006e-04
## 899 1.865569e-03
## 894 2.081846e-03
## 890 3.058466e-03
## 855 6.389597e-03
## 944 1.074160e-02
## 2750 1.190636e-02
## 1205 1.692082e-02
## 960 2.278929e-02
## 187 2.397284e-02
## 185 3.829231e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 3164 GO:0071732 8.264952e-07 1.0000000 5
## 3114 GO:0071369 1.490655e-06 1.0000000 5
## 3094 GO:0071281 7.588090e-06 0.9999997 5
## 254 GO:0006096 9.647819e-06 0.9999994 6
## numInCat term ontology
## 3164 52 cellular response to nitric oxide BP
## 3114 62 cellular response to ethylene stimulus BP
## 3094 77 cellular response to iron ion BP
## 254 138 glycolytic process BP
## over_represented_padjust
## 3164 0.002824047
## 3114 0.002824047
## 3094 0.009138897
## 254 0.009138897
# convert list to data.frame
temp %>% enframe(name="cluster") %>% unnest(value) %>% write_csv(path="../output/twoafternoon.trtsoil.DEG.Kmeans.cluster.csv")
# 2_afternoon DEG expression data (scaled)
cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG<-cpm.timecourse.v3.0.scale %>% dplyr::select(-cv) %>%
inner_join(twoafternoon.any.trtlive.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>%
gather(sample,value,-1) %>% inner_join(sample.description.timecourse,by="sample") %>% filter(sampling_time=="2_afternoon")
## Warning: Column `transcript_ID`/`genes` joining factor and character vector,
## coercing into character vector
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# spread
cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread<-cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG %>% dplyr::select(transcript_ID,sample,value) %>% spread(sample,value,-1)
dim(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread) # [1] 2178 97
## [1] 2178 97
# calculate wss
wss <- (nrow(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1],2,var))
for (i in 2:20) wss[i] <- sum(kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1],
centers=i,iter.max = 10)$withinss) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
plot(1:20, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")
Let’s perform the actual clsutering using K=5:
set.seed(20)
kClust.any.trtlive.5 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], centers=5, nstart = 1000, iter.max = 20)
kClusters.any.trtlive.5 <- kClust.any.trtlive.5$cluster
# number of clusters
cluster.any.trtlive.5.num<-tibble(cluster=kClusters.any.trtlive.5) %>% group_by(cluster) %>% summarize(n=n())
cluster.any.trtlive.5.num$cluster<-as.character(cluster.any.trtlive.5.num$cluster) # classic way
cluster.any.trtlive.5.num
Now we can calculate the cluster ‘cores’ aka centroids: # find centroid in cluster
kClustcentroids.any.trtlive.5 <- sapply(levels(factor(kClusters.any.trtlive.5)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], kClusters.any.trtlive.5)
kClustcentroids.any.trtlive.5 %>% head()
## 1 2 3 4
## 1a1_q_002_S1_R1_001 -0.370624345 -0.27016316 -0.781291105 -0.09145448
## 1a3_q_004_S3_R1_001 0.586129826 -0.45516870 -0.082928903 -0.31258537
## 1a7_q_007_d8_S7_R1_001 0.603390134 -0.06622119 -0.118731825 -0.06524704
## 1a8_q_008_d8_S8_R1_001 0.513232164 -0.44547237 -0.347272642 -0.23834981
## 1c6_q_028_S22_R1_001 -0.008042207 -0.39487064 0.006183179 -0.18385917
## 1d3_q_038_S27_R1_001 0.423676033 -0.18654295 -0.166417100 -0.15567393
## 5
## 1a1_q_002_S1_R1_001 0.48593350
## 1a3_q_004_S3_R1_001 -0.04775671
## 1a7_q_007_d8_S7_R1_001 -0.02718576
## 1a8_q_008_d8_S8_R1_001 0.19646882
## 1c6_q_028_S22_R1_001 0.18640730
## 1d3_q_038_S27_R1_001 0.01866725
# adding sample description to data
data.sample<-kClustcentroids.any.trtlive.5 %>% as_tibble(rownames="sample") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse,by="sample") %>%
inner_join(cluster.any.trtlive.5.num,by="cluster") %>%
mutate(cluster.n=glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
### under construction ####
data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>% inner_join(
data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>% dplyr::slice(rep(1:768)[!duplicated(.$group.cluster)])
# plot
p5.any.trtlive<- ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) +
geom_point() + geom_hline(yintercept=0,color="red") +
geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): five clusters",color = "Cluster",y="scaled expression level")
p5.any.trtlive
ggsave(p5.any.trtlive,file="../output/Twoafternoon.any.trtlive.DEG.Kmean.5clusters.png",width=11,height=6)
Let’s perform the actual clsutering using K=6:
set.seed(20)
kClust.any.trtlive.6 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], centers=6, nstart = 1000, iter.max = 20)
kClusters.any.trtlive.6 <- kClust.any.trtlive.6$cluster
# number of clusters
cluster.any.trtlive.6.num<-tibble(cluster=kClusters.any.trtlive.6) %>% group_by(cluster) %>% summarize(n=n())
cluster.any.trtlive.6.num$cluster<-as.character(cluster.any.trtlive.6.num$cluster) # classic way
cluster.any.trtlive.6.num
Now we can calculate the cluster ‘cores’ aka centroids: # find centroid in cluster
kClustcentroids.any.trtlive.6 <- sapply(levels(factor(kClusters.any.trtlive.6)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], kClusters.any.trtlive.6)
kClustcentroids.any.trtlive.6 %>% head()
## 1 2 3 4
## 1a1_q_002_S1_R1_001 0.65133082 -0.3158853 -0.8226438 -0.37371653
## 1a3_q_004_S3_R1_001 -0.08768173 -0.5162655 -0.1554494 0.63258565
## 1a7_q_007_d8_S7_R1_001 -0.06407782 -0.0306091 -0.2095253 0.68120007
## 1a8_q_008_d8_S8_R1_001 0.31357964 -0.5018963 -0.3659202 0.54455777
## 1c6_q_028_S22_R1_001 0.07730847 -0.4843704 0.1434211 -0.03188877
## 1d3_q_038_S27_R1_001 -0.08124684 -0.1882003 -0.2357184 0.49835252
## 5 6
## 1a1_q_002_S1_R1_001 -0.05820563 -0.23794070
## 1a3_q_004_S3_R1_001 -0.22758805 -0.22464750
## 1a7_q_007_d8_S7_R1_001 -0.07040824 -0.03305671
## 1a8_q_008_d8_S8_R1_001 -0.16841988 -0.30044683
## 1c6_q_028_S22_R1_001 -0.12421671 -0.15052035
## 1d3_q_038_S27_R1_001 -0.14962567 -0.05877720
# adding sample description to data
data.sample<-kClustcentroids.any.trtlive.6 %>% as_tibble(rownames="sample") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse,by="sample") %>%
inner_join(cluster.any.trtlive.6.num,by="cluster") %>%
mutate(cluster.n=glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
### under construction ####
data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>% inner_join(
data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>% dplyr::slice(rep(1:768)[!duplicated(.$group.cluster)])
# plot
p6.any.trtlive<- ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) +
geom_point() + geom_hline(yintercept=0,color="red") +
geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): six clusters",color = "Cluster",y="scaled expression level")
p6.any.trtlive
ggsave(p6.any.trtlive,file="../output/Twoafternoon.any.trtlive.DEG.Kmean.6clusters.png",width=11,height=6)
Let’s perform the actual clsutering using K=8:
set.seed(20)
kClust.any.trtlive.8 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], centers=8, nstart = 1000, iter.max = 20)
kClusters.any.trtlive.8 <- kClust.any.trtlive.8$cluster
# number of clusters
cluster.any.trtlive.8.num<-tibble(cluster=kClusters.any.trtlive.8) %>% group_by(cluster) %>% summarize(n=n())
cluster.any.trtlive.8.num$cluster<-as.character(cluster.any.trtlive.8.num$cluster) # classic way
cluster.any.trtlive.8.num
Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster
clust.centroid = function(i, dat, clusters) {
ind = (clusters == i)
colMeans(dat[ind,])
}
kClustcentroids.any.trtlive.8 <- sapply(levels(factor(kClusters.any.trtlive.8)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], kClusters.any.trtlive.8)
kClustcentroids.any.trtlive.8 %>% head()
## 1 2 3 4
## 1a1_q_002_S1_R1_001 0.66774332 -0.32765717 -0.04664319 -0.3967766
## 1a3_q_004_S3_R1_001 -0.16465433 -0.35598558 -0.23130395 -0.6495724
## 1a7_q_007_d8_S7_R1_001 -0.09954455 0.07985282 -0.05066739 -0.2320192
## 1a8_q_008_d8_S8_R1_001 0.29726762 -0.41141910 -0.16242202 -0.6164058
## 1c6_q_028_S22_R1_001 0.05698738 -0.44714663 -0.12673854 -0.4659240
## 1d3_q_038_S27_R1_001 -0.06494311 -0.01126279 -0.13568017 -0.4660710
## 5 6 7 8
## 1a1_q_002_S1_R1_001 -0.15395817 -0.9249212 -0.35478101 -0.584426385
## 1a3_q_004_S3_R1_001 -0.27426807 -0.4472171 0.58951253 0.598921297
## 1a7_q_007_d8_S7_R1_001 -0.01876720 -0.3831983 0.72398958 0.051023263
## 1a8_q_008_d8_S8_R1_001 -0.28330120 -0.4472883 0.55642867 -0.032406247
## 1c6_q_028_S22_R1_001 -0.11632418 0.3594808 -0.04093214 0.001411183
## 1d3_q_038_S27_R1_001 -0.01812421 -0.2720918 0.51892604 -0.120575363
# adding sample description to data
data.sample<-kClustcentroids.any.trtlive.8 %>% as_tibble(rownames="sample") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse,by="sample") %>%
inner_join(cluster.any.trtlive.8.num,by="cluster") %>%
mutate(cluster.n=glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>% inner_join(
data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>% dplyr::slice(rep(1:768)[!duplicated(.$group.cluster)])
# plot
p8.any.trtlive<- ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) +
geom_point() + geom_hline(yintercept=0,color="red") +
geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): eight clusters",color = "Cluster",y="scaled expression level")
p8.any.trtlive
ggsave(p8.any.trtlive,file="../output/Twoafternoon.any.trtlive.DEG.Kmean.8clusters.png",width=11,height=8)
Let’s perform the actual clsutering using K=15:
set.seed(20)
kClust.any.trtlive.15 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], centers=15, nstart = 1000, iter.max = 20)
kClusters.any.trtlive.15 <- kClust.any.trtlive.15$cluster
# number of clusters
cluster.any.trtlive.15.num<-tibble(cluster=kClusters.any.trtlive.15) %>% group_by(cluster) %>% summarize(n=n())
cluster.any.trtlive.15.num$cluster<-as.character(cluster.any.trtlive.15.num$cluster) # classic way
cluster.any.trtlive.15.num
Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i
clust.centroid = function(i, dat, clusters) {
ind = (clusters == i)
colMeans(dat[ind,])
}
kClustcentroids.any.trtlive.15 <- sapply(levels(factor(kClusters.any.trtlive.15)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], kClusters.any.trtlive.15)
kClustcentroids.any.trtlive.15 %>% head()
## 1 2 3 4
## 1a1_q_002_S1_R1_001 0.5363413 -0.368342751 -0.1930865 -0.9943538
## 1a3_q_004_S3_R1_001 -0.7294333 -0.364642600 -0.7338475 -0.4490430
## 1a7_q_007_d8_S7_R1_001 -0.3427135 -0.002591793 -0.1843309 -0.4124467
## 1a8_q_008_d8_S8_R1_001 -0.2509873 -0.479632975 -0.5871281 -0.4363606
## 1c6_q_028_S22_R1_001 -0.2064792 -0.519653925 -0.4785148 0.4240465
## 1d3_q_038_S27_R1_001 -0.4523775 -0.096021258 -0.3338139 -0.2100288
## 5 6 7 8
## 1a1_q_002_S1_R1_001 0.53906726 0.677494289 -0.2143971 -0.53950313
## 1a3_q_004_S3_R1_001 0.04061909 0.087791367 -0.3582435 0.18574802
## 1a7_q_007_d8_S7_R1_001 0.40465292 -0.031086513 -0.1696025 0.02199784
## 1a8_q_008_d8_S8_R1_001 0.40058289 0.358359368 -0.3610603 0.14468767
## 1c6_q_028_S22_R1_001 -0.20400698 0.146627590 -0.1992854 0.49896429
## 1d3_q_038_S27_R1_001 0.11604338 -0.009315873 -0.2560809 -0.17461624
## 9 10 11 12
## 1a1_q_002_S1_R1_001 -0.09328228 -0.05176534 -0.34883557 -0.5686172
## 1a3_q_004_S3_R1_001 -0.11607911 -0.12476358 0.94170519 0.6524345
## 1a7_q_007_d8_S7_R1_001 -0.03715975 0.14627723 -0.06335732 0.7831266
## 1a8_q_008_d8_S8_R1_001 -0.08119719 -0.14666264 0.10727360 0.4921936
## 1c6_q_028_S22_R1_001 -0.06907810 0.04886612 0.18259940 -0.2771016
## 1d3_q_038_S27_R1_001 -0.04874525 0.48051250 -0.19548676 0.7062537
## 13 14 15
## 1a1_q_002_S1_R1_001 -0.61805695 -0.317859857 -0.5165267
## 1a3_q_004_S3_R1_001 0.06162628 -0.315313752 -0.6740801
## 1a7_q_007_d8_S7_R1_001 0.14337203 0.066701371 -0.0568714
## 1a8_q_008_d8_S8_R1_001 -0.16162715 -0.378009042 -0.6538674
## 1c6_q_028_S22_R1_001 -0.16897390 -0.341682014 -0.4940442
## 1d3_q_038_S27_R1_001 -0.26367077 0.002871791 -0.4328578
# adding sample description to data
data.sample<-kClustcentroids.any.trtlive.15 %>% as_tibble(rownames="sample") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse,by="sample") %>%
inner_join(cluster.any.trtlive.15.num,by="cluster") %>%
mutate(cluster.n=glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>% inner_join(
data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>% dplyr::slice(rep(1:1440)[!duplicated(.$group.cluster)])
# plot
p15.any.trtlive<- ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) +
geom_point() + geom_hline(yintercept=0,color="red") +
geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): fifteen clusters",color = "Cluster",y="scaled expression level")
p15.any.trtlive
ggsave(p15.any.trtlive,file="../output/Twoafternoon.any.trtlive.DEG.Kmean.15clusters.png",width=11,height=15)
# 8 Kmeans cluster (my way using enframe, which I am not satisfied)
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread$transcript_ID, cluster=kClusters.any.trtlive.8) %>% split(.$cluster) %>% map(function(x) {GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(genelist=x$transcript_ID)})
# convert list to data.frame
temp %>% enframe(name="cluster") %>% unnest(value) %>% write_csv(path="../output/twoafternoon.any.trtsoil.DEG.Kmeans.8cluster.csv")
Julin’s method
# 5 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread$transcript_ID, cluster=kClusters.any.trtlive.5) %>%
group_by(cluster) %>%
nest(transcripts=transcript_ID) %>%
mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.))))
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742 3.003137e-29 1.0000000 64
## 638 GO:0006952 1.908550e-16 1.0000000 62
## 1174 GO:0010120 2.096043e-15 1.0000000 12
## 2750 GO:0050832 4.746634e-15 1.0000000 34
## 423 GO:0006468 2.673140e-14 1.0000000 73
## 890 GO:0009611 8.202025e-13 1.0000000 31
## 894 GO:0009617 4.432493e-12 1.0000000 23
## 3708 GO:2000022 2.023093e-11 1.0000000 14
## 1001 GO:0009816 8.965637e-11 1.0000000 15
## 1210 GO:0010200 2.735995e-10 1.0000000 21
## 1166 GO:0010112 8.973684e-10 1.0000000 8
## 185 GO:0002229 2.263061e-09 1.0000000 16
## 1857 GO:0031347 6.329428e-09 1.0000000 12
## 973 GO:0009753 1.218529e-07 1.0000000 20
## 853 GO:0009407 2.270165e-07 1.0000000 8
## 557 GO:0006749 5.668507e-07 1.0000000 9
## 899 GO:0009626 6.276177e-07 0.9999999 13
## 3438 GO:1900056 1.332557e-06 0.9999999 6
## 1183 GO:0010150 1.511624e-06 0.9999997 15
## 2301 GO:0043069 2.769559e-06 0.9999999 6
## 3442 GO:1900067 3.220011e-06 1.0000000 3
## 972 GO:0009751 5.643181e-06 0.9999986 17
## 3465 GO:1900457 6.387716e-06 0.9999999 4
## 646 GO:0006979 7.257172e-06 0.9999979 21
## 1858 GO:0031348 1.300314e-05 0.9999986 8
## 1205 GO:0010193 1.305263e-05 0.9999989 7
## 855 GO:0009409 1.367625e-05 0.9999954 25
## 3007 GO:0070370 1.432509e-05 0.9999994 5
## 900 GO:0009627 1.760788e-05 0.9999977 9
## 944 GO:0009697 2.460612e-05 0.9999989 5
## 2917 GO:0055114 2.523534e-05 0.9999874 53
## 2040 GO:0034087 2.900227e-05 0.9999993 4
## 3023 GO:0070550 2.900227e-05 0.9999993 4
## 3075 GO:0071169 2.900227e-05 0.9999993 4
## 3165 GO:0071733 2.900227e-05 0.9999993 4
## 3657 GO:1905406 2.900227e-05 0.9999993 4
## 3685 GO:1990414 2.900227e-05 0.9999993 4
## 942 GO:0009695 2.924173e-05 0.9999972 7
## 3322 GO:0090333 4.915444e-05 0.9999960 6
## 921 GO:0009651 6.652211e-05 0.9999716 32
## 1002 GO:0009817 8.221461e-05 0.9999885 8
## 1163 GO:0010106 8.638171e-05 0.9999967 4
## 1737 GO:0019761 9.623650e-05 0.9999886 7
## 558 GO:0006750 9.657666e-05 0.9999987 3
## 1035 GO:0009867 1.114810e-04 0.9999811 9
## 932 GO:0009682 1.469952e-04 0.9999894 5
## 898 GO:0009625 1.473365e-04 0.9999811 7
## 975 GO:0009759 1.657197e-04 0.9999925 4
## 3286 GO:0080185 1.670531e-04 0.9999969 3
## 933 GO:0009684 1.710238e-04 0.9999924 4
## 2794 GO:0051258 1.756902e-04 0.9999972 3
## 32 GO:0000162 2.093468e-04 0.9999836 5
## 485 GO:0006569 2.122848e-04 0.9999960 3
## 619 GO:0006887 2.237554e-04 0.9999637 8
## 895 GO:0009620 2.420533e-04 0.9999491 10
## 687 GO:0007076 2.592800e-04 0.9999872 4
## 2916 GO:0055091 2.727204e-04 1.0000000 2
## 3006 GO:0070328 2.727204e-04 1.0000000 2
## 2889 GO:0052544 2.788945e-04 0.9999770 5
## 1031 GO:0009863 2.898328e-04 0.9999755 5
## 3716 GO:2000031 3.016855e-04 0.9999742 5
## 2940 GO:0060548 3.416445e-04 0.9999815 4
## 1917 GO:0032260 3.874504e-04 0.9999985 2
## 3251 GO:0080142 4.220034e-04 0.9999756 4
## 642 GO:0006970 4.414010e-04 0.9998817 12
## 2582 GO:0046777 4.464102e-04 0.9998467 17
## 187 GO:0002237 6.107270e-04 0.9999005 7
## 1161 GO:0010104 7.951298e-04 0.9999696 3
## 2495 GO:0045927 8.705170e-04 0.9999667 3
## numInCat
## 2253 726
## 638 1165
## 1174 27
## 2750 469
## 423 1484
## 890 419
## 894 241
## 3708 89
## 1001 97
## 1210 286
## 1166 22
## 185 126
## 1857 95
## 973 338
## 853 67
## 557 85
## 899 140
## 3438 26
## 1183 219
## 2301 30
## 3442 3
## 972 347
## 3465 10
## 646 502
## 1858 64
## 1205 54
## 855 696
## 3007 21
## 900 104
## 944 21
## 2917 1923
## 2040 10
## 3023 10
## 3075 10
## 3165 10
## 3657 10
## 3685 10
## 942 47
## 3322 40
## 921 1045
## 1002 100
## 1163 19
## 1737 69
## 558 6
## 1035 126
## 932 33
## 898 84
## 975 19
## 3286 8
## 933 17
## 2794 6
## 32 35
## 485 7
## 619 96
## 895 150
## 687 16
## 2916 2
## 3006 2
## 2889 31
## 1031 38
## 3716 38
## 2940 18
## 1917 3
## 3251 19
## 642 252
## 2582 363
## 187 88
## 1161 17
## 2495 14
## term
## 2253 defense response to bacterium
## 638 defense response
## 1174 camalexin biosynthetic process
## 2750 defense response to fungus
## 423 protein phosphorylation
## 890 response to wounding
## 894 response to bacterium
## 3708 regulation of jasmonic acid mediated signaling pathway
## 1001 defense response to bacterium, incompatible interaction
## 1210 response to chitin
## 1166 regulation of systemic acquired resistance
## 185 defense response to oomycetes
## 1857 regulation of defense response
## 973 response to jasmonic acid
## 853 toxin catabolic process
## 557 glutathione metabolic process
## 899 plant-type hypersensitive response
## 3438 negative regulation of leaf senescence
## 1183 leaf senescence
## 2301 negative regulation of programmed cell death
## 3442 regulation of cellular response to alkaline pH
## 972 response to salicylic acid
## 3465 regulation of brassinosteroid mediated signaling pathway
## 646 response to oxidative stress
## 1858 negative regulation of defense response
## 1205 response to ozone
## 855 response to cold
## 3007 cellular heat acclimation
## 900 systemic acquired resistance
## 944 salicylic acid biosynthetic process
## 2917 oxidation-reduction process
## 2040 establishment of mitotic sister chromatid cohesion
## 3023 rDNA condensation
## 3075 establishment of protein localization to chromatin
## 3165 transcriptional activation by promoter-enhancer looping
## 3657 positive regulation of mitotic cohesin loading
## 3685 replication-born double-strand break repair via sister chromatid exchange
## 942 jasmonic acid biosynthetic process
## 3322 regulation of stomatal closure
## 921 response to salt stress
## 1002 defense response to fungus, incompatible interaction
## 1163 cellular response to iron ion starvation
## 1737 glucosinolate biosynthetic process
## 558 glutathione biosynthetic process
## 1035 jasmonic acid mediated signaling pathway
## 932 induced systemic resistance
## 898 response to insect
## 975 indole glucosinolate biosynthetic process
## 3286 effector dependent induction by symbiont of host immune response
## 933 indoleacetic acid biosynthetic process
## 2794 protein polymerization
## 32 tryptophan biosynthetic process
## 485 tryptophan catabolic process
## 619 exocytosis
## 895 response to fungus
## 687 mitotic chromosome condensation
## 2916 phospholipid homeostasis
## 3006 triglyceride homeostasis
## 2889 defense response by callose deposition in cell wall
## 1031 salicylic acid mediated signaling pathway
## 3716 regulation of salicylic acid mediated signaling pathway
## 2940 negative regulation of cell death
## 1917 response to jasmonic acid stimulus involved in jasmonic acid and ethylene-dependent systemic resistance
## 3251 regulation of salicylic acid biosynthetic process
## 642 response to osmotic stress
## 2582 protein autophosphorylation
## 187 response to molecule of bacterial origin
## 1161 regulation of ethylene-activated signaling pathway
## 2495 positive regulation of growth
## ontology over_represented_padjust
## 2253 BP 1.137889e-25
## 638 BP 3.615748e-13
## 1174 BP 2.647302e-12
## 2750 BP 4.496249e-12
## 423 BP 2.025706e-11
## 890 BP 5.179579e-10
## 894 BP 2.399245e-09
## 3708 BP 9.581874e-09
## 1001 BP 3.774533e-08
## 1210 BP 1.036668e-07
## 1166 BP 3.091026e-07
## 185 BP 7.145614e-07
## 1857 BP 1.844785e-06
## 973 BP 3.297861e-05
## 853 BP 5.734436e-05
## 557 BP 1.342373e-04
## 899 BP 1.398849e-04
## 3438 BP 2.805032e-04
## 1183 BP 3.014497e-04
## 2301 BP 5.246930e-04
## 3442 BP 5.809819e-04
## 972 BP 9.719097e-04
## 3465 BP 1.052307e-03
## 646 BP 1.145726e-03
## 1858 BP 1.902170e-03
## 1205 BP 1.902170e-03
## 855 BP 1.919234e-03
## 3007 BP 1.938492e-03
## 900 BP 2.300560e-03
## 944 BP 2.915709e-03
## 2917 BP 2.915709e-03
## 2040 BP 2.915709e-03
## 3023 BP 2.915709e-03
## 3075 BP 2.915709e-03
## 3165 BP 2.915709e-03
## 3657 BP 2.915709e-03
## 3685 BP 2.915709e-03
## 942 BP 2.915709e-03
## 3322 BP 4.775543e-03
## 921 BP 6.301307e-03
## 1002 BP 7.597833e-03
## 1163 BP 7.792864e-03
## 1737 BP 8.316567e-03
## 558 BP 8.316567e-03
## 1035 BP 9.386699e-03
## 932 BP 1.187783e-02
## 898 BP 1.187783e-02
## 975 BP 1.291763e-02
## 3286 BP 1.291763e-02
## 933 BP 1.296019e-02
## 2794 BP 1.305275e-02
## 32 BP 1.517636e-02
## 485 BP 1.517636e-02
## 619 BP 1.570017e-02
## 895 BP 1.667527e-02
## 687 BP 1.754307e-02
## 2916 BP 1.781616e-02
## 3006 BP 1.781616e-02
## 2889 BP 1.791070e-02
## 1031 BP 1.830294e-02
## 3716 BP 1.873912e-02
## 2940 BP 2.087889e-02
## 1917 BP 2.330237e-02
## 3251 BP 2.498392e-02
## 642 BP 2.562800e-02
## 2582 BP 2.562800e-02
## 187 BP 3.453798e-02
## 1161 BP 4.430510e-02
## 2495 BP 4.780274e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345 8.680462e-11 1.0000000 9
## 972 GO:0009751 5.938723e-08 1.0000000 15
## 515 GO:0006631 5.340014e-07 1.0000000 8
## 514 GO:0006629 4.303455e-06 0.9999994 11
## 1181 GO:0010143 6.172288e-06 0.9999998 5
## 1695 GO:0019441 1.951088e-05 0.9999998 3
## 2917 GO:0055114 4.354688e-05 0.9999818 33
## 967 GO:0009744 7.490509e-05 0.9999913 7
## numInCat term ontology
## 1291 41 suberin biosynthetic process BP
## 972 347 response to salicylic acid BP
## 515 85 fatty acid metabolic process BP
## 514 220 lipid metabolic process BP
## 1181 29 cutin biosynthetic process BP
## 1695 7 tryptophan catabolic process to kynurenine BP
## 2917 1923 oxidation-reduction process BP
## 967 136 response to sucrose BP
## over_represented_padjust
## 1291 3.289027e-07
## 972 1.125091e-04
## 515 6.744437e-04
## 514 4.076447e-03
## 1181 4.677360e-03
## 1695 1.232112e-02
## 2917 2.357131e-02
## 967 3.547692e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 3164 GO:0071732 8.887227e-12 1.0000000 13
## 1210 GO:0010200 1.371146e-11 1.0000000 26
## 638 GO:0006952 4.650995e-11 1.0000000 57
## 3094 GO:0071281 2.322895e-10 1.0000000 14
## 853 GO:0009407 9.886978e-08 1.0000000 10
## 3114 GO:0071369 1.468447e-07 1.0000000 10
## 3708 GO:2000022 6.386429e-07 0.9999999 11
## 359 GO:0006355 1.025051e-06 0.9999996 90
## 557 GO:0006749 1.661546e-06 0.9999998 10
## 3125 GO:0071456 2.270396e-06 0.9999999 7
## 1166 GO:0010112 2.374497e-06 0.9999999 6
## 890 GO:0009611 4.880059e-06 0.9999985 23
## 646 GO:0006979 1.566004e-05 0.9999948 24
## 1039 GO:0009873 2.803104e-05 0.9999917 19
## 1919 GO:0032268 4.851525e-05 0.9999996 3
## 1857 GO:0031347 5.033797e-05 0.9999924 9
## 2253 GO:0042742 5.896546e-05 0.9999758 30
## 973 GO:0009753 6.047415e-05 0.9999818 18
## 2525 GO:0046256 7.775999e-05 0.9999991 3
## 2750 GO:0050832 1.107468e-04 0.9999611 21
## 914 GO:0009644 1.397202e-04 0.9999757 9
## 475 GO:0006559 1.521539e-04 0.9999892 5
## 1162 GO:0010105 2.563072e-04 0.9999794 5
## 2907 GO:0055072 2.953109e-04 0.9999579 7
## numInCat term
## 3164 52 cellular response to nitric oxide
## 1210 286 response to chitin
## 638 1165 defense response
## 3094 77 cellular response to iron ion
## 853 67 toxin catabolic process
## 3114 62 cellular response to ethylene stimulus
## 3708 89 regulation of jasmonic acid mediated signaling pathway
## 359 2992 regulation of transcription, DNA-templated
## 557 85 glutathione metabolic process
## 3125 35 cellular response to hypoxia
## 1166 22 regulation of systemic acquired resistance
## 890 419 response to wounding
## 646 502 response to oxidative stress
## 1039 364 ethylene-activated signaling pathway
## 1919 5 regulation of cellular protein metabolic process
## 1857 95 regulation of defense response
## 2253 726 defense response to bacterium
## 973 338 response to jasmonic acid
## 2525 6 2,4,6-trinitrotoluene catabolic process
## 2750 469 defense response to fungus
## 914 109 response to high light intensity
## 475 27 L-phenylalanine catabolic process
## 1162 29 negative regulation of ethylene-activated signaling pathway
## 2907 69 iron ion homeostasis
## ontology over_represented_padjust
## 3164 BP 2.597635e-08
## 1210 BP 2.597635e-08
## 638 BP 5.874207e-08
## 3094 BP 2.200362e-07
## 853 BP 7.492352e-05
## 3114 BP 9.273245e-05
## 3708 BP 3.456883e-04
## 359 BP 4.854897e-04
## 557 BP 6.995107e-04
## 3125 BP 8.179061e-04
## 1166 BP 8.179061e-04
## 890 BP 1.540879e-03
## 646 BP 4.564300e-03
## 1039 BP 7.586400e-03
## 1919 BP 1.192066e-02
## 1857 BP 1.192066e-02
## 2253 BP 1.272981e-02
## 973 BP 1.272981e-02
## 2525 BP 1.550698e-02
## 2750 BP 2.098098e-02
## 914 BP 2.520952e-02
## 475 BP 2.620504e-02
## 1162 BP 4.222383e-02
## 2907 BP 4.662220e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
# using unnest()
temp %>% unnest(GO_result) %>% write_csv(path="../output/twoafternoon.any.trtsoil.DEG.Kmeans.5cluster.csv")
# 6 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread$transcript_ID, cluster=kClusters.any.trtlive.6) %>%
group_by(cluster) %>%
nest(transcripts=transcript_ID) %>%
mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.))))
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1462 GO:0015760 6.666328e-07 1.0000000 3
## 1449 GO:0015714 2.310538e-06 1.0000000 3
## 2143 GO:0035436 3.733924e-06 1.0000000 3
## 1448 GO:0015713 7.917811e-06 0.9999999 3
## 359 GO:0006355 9.951440e-06 0.9999967 26
## numInCat term ontology
## 1462 5 glucose-6-phosphate transport BP
## 1449 7 phosphoenolpyruvate transport BP
## 2143 8 triose phosphate transmembrane transport BP
## 1448 10 phosphoglycerate transmembrane transport BP
## 359 2992 regulation of transcription, DNA-templated BP
## over_represented_padjust
## 1462 0.002525872
## 1449 0.004377314
## 2143 0.004715947
## 1448 0.007500146
## 359 0.007541201
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1174 GO:0010120 4.637858e-14 1.0000000 11
## 890 GO:0009611 8.865680e-13 1.0000000 29
## 2253 GO:0042742 2.390883e-12 1.0000000 38
## 638 GO:0006952 4.243378e-12 1.0000000 49
## 1166 GO:0010112 6.196936e-12 1.0000000 9
## 1210 GO:0010200 5.044516e-09 1.0000000 19
## 853 GO:0009407 2.034535e-08 1.0000000 9
## 1205 GO:0010193 3.190937e-08 1.0000000 9
## 557 GO:0006749 3.934100e-08 1.0000000 10
## 894 GO:0009617 4.632011e-08 1.0000000 17
## 899 GO:0009626 1.434417e-07 1.0000000 13
## 2750 GO:0050832 4.795283e-07 0.9999999 21
## 1001 GO:0009816 1.494838e-06 0.9999998 10
## 1183 GO:0010150 1.985337e-06 0.9999996 14
## 3708 GO:2000022 2.299477e-06 0.9999998 9
## 973 GO:0009753 2.974438e-06 0.9999993 17
## 972 GO:0009751 1.106489e-05 0.9999973 16
## 3438 GO:1900056 2.287173e-05 0.9999989 5
## 1857 GO:0031347 2.543493e-05 0.9999970 8
## 185 GO:0002229 2.874628e-05 0.9999954 10
## 1002 GO:0009817 3.540237e-05 0.9999956 8
## 423 GO:0006468 5.825218e-05 0.9999721 43
## 234 GO:0006032 6.100577e-05 0.9999963 5
## 619 GO:0006887 7.009588e-05 0.9999904 8
## 630 GO:0006904 1.164745e-04 0.9999857 7
## 933 GO:0009684 1.249085e-04 0.9999949 4
## 2889 GO:0052544 1.346118e-04 0.9999905 5
## 485 GO:0006569 1.497008e-04 0.9999975 3
## 2237 GO:0042538 1.591235e-04 0.9999752 8
## 1031 GO:0009863 1.740887e-04 0.9999868 5
## 2986 GO:0062034 2.044306e-04 0.9999960 3
## 2400 GO:0044419 2.250712e-04 0.9999954 3
## 1035 GO:0009867 2.299621e-04 0.9999621 8
## 944 GO:0009697 2.354733e-04 0.9999881 4
## 2859 GO:0051707 2.927450e-04 0.9999577 7
## 900 GO:0009627 3.694357e-04 0.9999443 7
## 895 GO:0009620 3.731450e-04 0.9999250 9
## 2034 GO:0034052 4.029842e-04 0.9999891 3
## 2917 GO:0055114 4.689601e-04 0.9997473 45
## 1737 GO:0019761 5.010736e-04 0.9999355 6
## numInCat term
## 1174 27 camalexin biosynthetic process
## 890 419 response to wounding
## 2253 726 defense response to bacterium
## 638 1165 defense response
## 1166 22 regulation of systemic acquired resistance
## 1210 286 response to chitin
## 853 67 toxin catabolic process
## 1205 54 response to ozone
## 557 85 glutathione metabolic process
## 894 241 response to bacterium
## 899 140 plant-type hypersensitive response
## 2750 469 defense response to fungus
## 1001 97 defense response to bacterium, incompatible interaction
## 1183 219 leaf senescence
## 3708 89 regulation of jasmonic acid mediated signaling pathway
## 973 338 response to jasmonic acid
## 972 347 response to salicylic acid
## 3438 26 negative regulation of leaf senescence
## 1857 95 regulation of defense response
## 185 126 defense response to oomycetes
## 1002 100 defense response to fungus, incompatible interaction
## 423 1484 protein phosphorylation
## 234 35 chitin catabolic process
## 619 96 exocytosis
## 630 73 vesicle docking involved in exocytosis
## 933 17 indoleacetic acid biosynthetic process
## 2889 31 defense response by callose deposition in cell wall
## 485 7 tryptophan catabolic process
## 2237 132 hyperosmotic salinity response
## 1031 38 salicylic acid mediated signaling pathway
## 2986 8 L-pipecolic acid biosynthetic process
## 2400 8 interspecies interaction between organisms
## 1035 126 jasmonic acid mediated signaling pathway
## 944 21 salicylic acid biosynthetic process
## 2859 97 response to other organism
## 900 104 systemic acquired resistance
## 895 150 response to fungus
## 2034 10 positive regulation of plant-type hypersensitive response
## 2917 1923 oxidation-reduction process
## 1737 69 glucosinolate biosynthetic process
## ontology over_represented_padjust
## 1174 BP 1.757284e-10
## 890 BP 1.679603e-09
## 2253 BP 3.019685e-09
## 638 BP 4.019540e-09
## 1166 BP 4.696038e-09
## 1210 BP 3.185612e-06
## 853 BP 1.101265e-05
## 1205 BP 1.511308e-05
## 557 BP 1.656256e-05
## 894 BP 1.755069e-05
## 899 BP 4.940915e-05
## 2750 BP 1.514111e-04
## 1001 BP 4.356877e-04
## 1183 BP 5.373174e-04
## 3708 BP 5.808478e-04
## 973 BP 7.043841e-04
## 972 BP 2.466169e-03
## 3438 BP 4.814499e-03
## 1857 BP 5.072261e-03
## 185 BP 5.445983e-03
## 1002 BP 6.387599e-03
## 423 BP 1.003261e-02
## 234 BP 1.005004e-02
## 619 BP 1.106639e-02
## 630 BP 1.765288e-02
## 933 BP 1.820301e-02
## 2889 BP 1.889052e-02
## 485 BP 2.025772e-02
## 2237 BP 2.079030e-02
## 1031 BP 2.198741e-02
## 2986 BP 2.498670e-02
## 2400 BP 2.624142e-02
## 1035 BP 2.624142e-02
## 944 BP 2.624142e-02
## 2859 BP 3.169174e-02
## 900 BP 3.821207e-02
## 895 BP 3.821207e-02
## 2034 BP 4.018176e-02
## 2917 BP 4.556128e-02
## 1737 BP 4.746420e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345 2.555181e-13 1.0000000 10
## 514 GO:0006629 4.812957e-07 0.9999999 11
## 1181 GO:0010143 2.014640e-06 0.9999999 5
## 972 GO:0009751 9.112896e-06 0.9999985 11
## 2917 GO:0055114 9.199414e-06 0.9999967 30
## 1695 GO:0019441 1.005513e-05 0.9999999 3
## 515 GO:0006631 2.199033e-05 0.9999984 6
## 646 GO:0006979 5.372240e-05 0.9999886 12
## 953 GO:0009725 1.054515e-04 0.9999981 3
## numInCat term ontology
## 1291 41 suberin biosynthetic process BP
## 514 220 lipid metabolic process BP
## 1181 29 cutin biosynthetic process BP
## 972 347 response to salicylic acid BP
## 2917 1923 oxidation-reduction process BP
## 1695 7 tryptophan catabolic process to kynurenine BP
## 515 85 fatty acid metabolic process BP
## 646 502 response to oxidative stress BP
## 953 14 response to hormone BP
## over_represented_padjust
## 1291 9.681580e-10
## 514 9.118147e-04
## 1181 2.544490e-03
## 972 6.349817e-03
## 2917 6.349817e-03
## 1695 6.349817e-03
## 515 1.190305e-02
## 646 2.544427e-02
## 953 4.439510e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 3164 GO:0071732 1.250124e-11 1.0000000 12
## 3094 GO:0071281 1.727152e-10 1.0000000 13
## 1210 GO:0010200 2.261492e-10 1.0000000 22
## 359 GO:0006355 1.182921e-07 1.0000000 79
## 3114 GO:0071369 2.654178e-07 1.0000000 9
## 914 GO:0009644 5.003516e-07 0.9999999 11
## 3125 GO:0071456 5.590323e-07 1.0000000 7
## 638 GO:0006952 5.686545e-07 0.9999998 41
## 2239 GO:0042542 2.587505e-06 0.9999996 11
## 646 GO:0006979 5.123726e-06 0.9999985 22
## 1166 GO:0010112 1.563231e-05 0.9999993 5
## 1919 GO:0032268 2.630607e-05 0.9999998 3
## 3708 GO:2000022 4.877420e-05 0.9999937 8
## 854 GO:0009408 5.008081e-05 0.9999871 15
## 1039 GO:0009873 7.742102e-05 0.9999781 16
## 1270 GO:0010286 1.014015e-04 0.9999853 8
## 2253 GO:0042742 1.302990e-04 0.9999489 25
## 2795 GO:0051259 1.328760e-04 0.9999942 4
## 2750 GO:0050832 1.702764e-04 0.9999439 18
## 853 GO:0009407 2.442848e-04 0.9999726 6
## 3117 GO:0071398 2.532181e-04 0.9999944 3
## 170 GO:0001944 2.905167e-04 0.9999752 5
## 2487 GO:0045893 3.025579e-04 0.9998886 20
## numInCat term ontology
## 3164 52 cellular response to nitric oxide BP
## 3094 77 cellular response to iron ion BP
## 1210 286 response to chitin BP
## 359 2992 regulation of transcription, DNA-templated BP
## 3114 62 cellular response to ethylene stimulus BP
## 914 109 response to high light intensity BP
## 3125 35 cellular response to hypoxia BP
## 638 1165 defense response BP
## 2239 130 response to hydrogen peroxide BP
## 646 502 response to oxidative stress BP
## 1166 22 regulation of systemic acquired resistance BP
## 1919 5 regulation of cellular protein metabolic process BP
## 3708 89 regulation of jasmonic acid mediated signaling pathway BP
## 854 305 response to heat BP
## 1039 364 ethylene-activated signaling pathway BP
## 1270 99 heat acclimation BP
## 2253 726 defense response to bacterium BP
## 2795 22 protein complex oligomerization BP
## 2750 469 defense response to fungus BP
## 853 67 toxin catabolic process BP
## 3117 9 cellular response to fatty acid BP
## 170 41 vasculature development BP
## 2487 570 positive regulation of transcription, DNA-templated BP
## over_represented_padjust
## 3164 4.736720e-08
## 3094 2.856265e-07
## 1210 2.856265e-07
## 359 1.120522e-04
## 3114 2.011336e-04
## 914 2.693290e-04
## 3125 2.693290e-04
## 638 2.693290e-04
## 2239 1.089340e-03
## 646 1.941380e-03
## 1166 5.384620e-03
## 1919 8.306142e-03
## 3708 1.355401e-02
## 854 1.355401e-02
## 1039 1.955655e-02
## 1270 2.401315e-02
## 2253 2.797040e-02
## 2795 2.797040e-02
## 2750 3.395671e-02
## 853 4.568779e-02
## 3117 4.568779e-02
## 170 4.984312e-02
## 2487 4.984312e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742 2.066964e-18 1.0000000 37
## 423 GO:0006468 1.525619e-13 1.0000000 50
## 2750 GO:0050832 1.327629e-12 1.0000000 23
## 638 GO:0006952 7.917829e-11 1.0000000 36
## 3708 GO:2000022 9.773011e-10 1.0000000 10
## 1857 GO:0031347 4.231062e-07 1.0000000 8
## 26 GO:0000103 8.320928e-07 1.0000000 5
## 1210 GO:0010200 5.184900e-06 0.9999992 11
## 890 GO:0009611 1.488002e-05 0.9999968 14
## 3007 GO:0070370 2.605475e-05 0.9999993 4
## 1858 GO:0031348 3.037671e-05 0.9999977 6
## 642 GO:0006970 4.109078e-05 0.9999930 10
## 973 GO:0009753 6.199438e-05 0.9999877 11
## 3009 GO:0070417 1.176377e-04 0.9999916 5
## 900 GO:0009627 1.462438e-04 0.9999849 6
## 942 GO:0009695 1.484271e-04 0.9999890 5
## 2040 GO:0034087 1.497554e-04 0.9999971 3
## 3023 GO:0070550 1.497554e-04 0.9999971 3
## 3075 GO:0071169 1.497554e-04 0.9999971 3
## 3165 GO:0071733 1.497554e-04 0.9999971 3
## 3657 GO:1905406 1.497554e-04 0.9999971 3
## 3685 GO:1990414 1.497554e-04 0.9999971 3
## 3442 GO:1900067 1.797488e-04 0.9999995 2
## 894 GO:0009617 2.100138e-04 0.9999607 9
## 2917 GO:0055114 2.219751e-04 0.9999010 31
## 2223 GO:0042344 2.342897e-04 0.9999943 3
## numInCat
## 2253 726
## 423 1484
## 2750 469
## 638 1165
## 3708 89
## 1857 95
## 26 24
## 1210 286
## 890 419
## 3007 21
## 1858 64
## 642 252
## 973 338
## 3009 54
## 900 104
## 942 47
## 2040 10
## 3023 10
## 3075 10
## 3165 10
## 3657 10
## 3685 10
## 3442 3
## 894 241
## 2917 1923
## 2223 15
## term
## 2253 defense response to bacterium
## 423 protein phosphorylation
## 2750 defense response to fungus
## 638 defense response
## 3708 regulation of jasmonic acid mediated signaling pathway
## 1857 regulation of defense response
## 26 sulfate assimilation
## 1210 response to chitin
## 890 response to wounding
## 3007 cellular heat acclimation
## 1858 negative regulation of defense response
## 642 response to osmotic stress
## 973 response to jasmonic acid
## 3009 cellular response to cold
## 900 systemic acquired resistance
## 942 jasmonic acid biosynthetic process
## 2040 establishment of mitotic sister chromatid cohesion
## 3023 rDNA condensation
## 3075 establishment of protein localization to chromatin
## 3165 transcriptional activation by promoter-enhancer looping
## 3657 positive regulation of mitotic cohesin loading
## 3685 replication-born double-strand break repair via sister chromatid exchange
## 3442 regulation of cellular response to alkaline pH
## 894 response to bacterium
## 2917 oxidation-reduction process
## 2223 indole glucosinolate catabolic process
## ontology over_represented_padjust
## 2253 BP 7.831726e-15
## 423 BP 2.890285e-10
## 2750 BP 1.676795e-09
## 638 BP 7.500164e-08
## 3708 BP 7.405988e-07
## 1857 BP 2.671916e-04
## 26 BP 4.503999e-04
## 1210 BP 2.455698e-03
## 890 BP 6.264487e-03
## 3007 BP 9.872143e-03
## 1858 BP 1.046340e-02
## 642 BP 1.297441e-02
## 973 BP 1.806898e-02
## 3009 BP 2.579197e-02
## 900 BP 2.579197e-02
## 942 BP 2.579197e-02
## 2040 BP 2.579197e-02
## 3023 BP 2.579197e-02
## 3075 BP 2.579197e-02
## 3165 BP 2.579197e-02
## 3657 BP 2.579197e-02
## 3685 BP 2.579197e-02
## 3442 BP 2.961165e-02
## 894 BP 3.315592e-02
## 2917 BP 3.364255e-02
## 2223 BP 3.414322e-02
# using unnest()
temp %>% unnest(GO_result) %>% write_csv(path="../output/twoafternoon.any.trtsoil.DEG.Kmeans.6cluster.csv")
# 8 Kmeans cluster
temp <- tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread$transcript_ID, cluster=kClusters.any.trtlive.8) %>%
group_by(cluster) %>%
nest(transcripts=transcript_ID) %>%
mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.)))) # learn "~" for calling function in map
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1462 GO:0015760 5.623992e-07 1.0000000 3
## 359 GO:0006355 1.096760e-06 0.9999997 27
## 1449 GO:0015714 1.949991e-06 1.0000000 3
## 2143 GO:0035436 3.151415e-06 1.0000000 3
## 1448 GO:0015713 6.685127e-06 1.0000000 3
## numInCat term ontology
## 1462 5 glucose-6-phosphate transport BP
## 359 2992 regulation of transcription, DNA-templated BP
## 1449 7 phosphoenolpyruvate transport BP
## 2143 8 triose phosphate transmembrane transport BP
## 1448 10 phosphoglycerate transmembrane transport BP
## over_represented_padjust
## 1462 0.002077812
## 359 0.002077812
## 1449 0.002462839
## 2143 0.002985178
## 1448 0.005065989
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1174 GO:0010120 3.933902e-14 1.0000000 10
## 2253 GO:0042742 8.640276e-12 1.0000000 30
## 1166 GO:0010112 1.217158e-11 1.0000000 8
## 890 GO:0009611 3.002585e-10 1.0000000 21
## 638 GO:0006952 2.886080e-09 1.0000000 34
## 2750 GO:0050832 8.711643e-08 1.0000000 18
## 557 GO:0006749 2.512131e-07 1.0000000 8
## 853 GO:0009407 3.046529e-07 1.0000000 7
## 1210 GO:0010200 1.093472e-06 0.9999998 13
## 899 GO:0009626 1.204013e-06 0.9999999 10
## 894 GO:0009617 2.637069e-06 0.9999996 12
## 900 GO:0009627 3.095308e-06 0.9999997 8
## 3438 GO:1900056 3.275946e-06 0.9999999 5
## 2237 GO:0042538 9.409826e-06 0.9999990 8
## 1031 GO:0009863 2.617001e-05 0.9999987 5
## 933 GO:0009684 2.626258e-05 0.9999993 4
## 619 GO:0006887 3.813169e-05 0.9999961 7
## 485 GO:0006569 4.543349e-05 0.9999995 3
## 2986 GO:0062034 6.221247e-05 0.9999992 3
## 2400 GO:0044419 6.855688e-05 0.9999991 3
## 898 GO:0009625 8.048379e-05 0.9999926 6
## 3708 GO:2000022 1.235004e-04 0.9999877 6
## 2034 GO:0034052 1.235119e-04 0.9999978 3
## 972 GO:0009751 2.045166e-04 0.9999533 11
## 477 GO:0006561 2.669627e-04 0.9999933 3
## 932 GO:0009682 2.729335e-04 0.9999850 4
## 185 GO:0002229 3.590825e-04 0.9999460 7
## 646 GO:0006979 3.684017e-04 0.9998966 13
## 1001 GO:0009816 3.687152e-04 0.9999547 6
## 32 GO:0000162 3.720090e-04 0.9999777 4
## 2230 GO:0042372 4.057400e-04 0.9999876 3
## numInCat term
## 1174 27 camalexin biosynthetic process
## 2253 726 defense response to bacterium
## 1166 22 regulation of systemic acquired resistance
## 890 419 response to wounding
## 638 1165 defense response
## 2750 469 defense response to fungus
## 557 85 glutathione metabolic process
## 853 67 toxin catabolic process
## 1210 286 response to chitin
## 899 140 plant-type hypersensitive response
## 894 241 response to bacterium
## 900 104 systemic acquired resistance
## 3438 26 negative regulation of leaf senescence
## 2237 132 hyperosmotic salinity response
## 1031 38 salicylic acid mediated signaling pathway
## 933 17 indoleacetic acid biosynthetic process
## 619 96 exocytosis
## 485 7 tryptophan catabolic process
## 2986 8 L-pipecolic acid biosynthetic process
## 2400 8 interspecies interaction between organisms
## 898 84 response to insect
## 3708 89 regulation of jasmonic acid mediated signaling pathway
## 2034 10 positive regulation of plant-type hypersensitive response
## 972 347 response to salicylic acid
## 477 13 proline biosynthetic process
## 932 33 induced systemic resistance
## 185 126 defense response to oomycetes
## 646 502 response to oxidative stress
## 1001 97 defense response to bacterium, incompatible interaction
## 32 35 tryptophan biosynthetic process
## 2230 18 phylloquinone biosynthetic process
## ontology over_represented_padjust
## 1174 BP 1.490555e-10
## 2253 BP 1.537270e-08
## 1166 BP 1.537270e-08
## 890 BP 2.844199e-07
## 638 BP 2.187072e-06
## 2750 BP 5.501403e-05
## 557 BP 1.359781e-04
## 853 BP 1.442912e-04
## 1210 BP 4.562004e-04
## 899 BP 4.562004e-04
## 894 BP 9.083503e-04
## 900 BP 9.548123e-04
## 3438 BP 9.548123e-04
## 2237 BP 2.546702e-03
## 1031 BP 6.219308e-03
## 933 BP 6.219308e-03
## 619 BP 8.498880e-03
## 485 BP 9.563750e-03
## 2986 BP 1.240648e-02
## 2400 BP 1.298810e-02
## 898 BP 1.452157e-02
## 3708 BP 2.034725e-02
## 2034 BP 2.034725e-02
## 972 BP 3.228805e-02
## 477 BP 3.977480e-02
## 932 BP 3.977480e-02
## 185 BP 4.698474e-02
## 646 BP 4.698474e-02
## 1001 BP 4.698474e-02
## 32 BP 4.698474e-02
## 2230 BP 4.959190e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1210 GO:0010200 1.343043e-10 1.0000000 22
## 3164 GO:0071732 1.889538e-10 1.0000000 11
## 3094 GO:0071281 1.613934e-09 1.0000000 12
## 359 GO:0006355 2.926398e-07 1.0000000 76
## 914 GO:0009644 3.813626e-07 1.0000000 11
## 3125 GO:0071456 4.645253e-07 1.0000000 7
## 3114 GO:0071369 2.507765e-06 0.9999998 8
## 638 GO:0006952 4.345368e-06 0.9999982 38
## 1166 GO:0010112 1.355092e-05 0.9999994 5
## 1039 GO:0009873 1.550994e-05 0.9999960 17
## 170 GO:0001944 2.120295e-05 0.9999985 6
## 853 GO:0009407 2.365588e-05 0.9999978 7
## 1919 GO:0032268 2.430717e-05 0.9999998 3
## 854 GO:0009408 3.627089e-05 0.9999909 15
## 3708 GO:2000022 3.991164e-05 0.9999950 8
## 2239 GO:0042542 8.277225e-05 0.9999865 9
## 1270 GO:0010286 8.336564e-05 0.9999883 8
## 646 GO:0006979 1.072674e-04 0.9999647 19
## 2795 GO:0051259 1.232705e-04 0.9999947 4
## 557 GO:0006749 1.465446e-04 0.9999812 7
## 2253 GO:0042742 2.068113e-04 0.9999181 24
## 3117 GO:0071398 2.323324e-04 0.9999950 3
## numInCat term ontology
## 1210 286 response to chitin BP
## 3164 52 cellular response to nitric oxide BP
## 3094 77 cellular response to iron ion BP
## 359 2992 regulation of transcription, DNA-templated BP
## 914 109 response to high light intensity BP
## 3125 35 cellular response to hypoxia BP
## 3114 62 cellular response to ethylene stimulus BP
## 638 1165 defense response BP
## 1166 22 regulation of systemic acquired resistance BP
## 1039 364 ethylene-activated signaling pathway BP
## 170 41 vasculature development BP
## 853 67 toxin catabolic process BP
## 1919 5 regulation of cellular protein metabolic process BP
## 854 305 response to heat BP
## 3708 89 regulation of jasmonic acid mediated signaling pathway BP
## 2239 130 response to hydrogen peroxide BP
## 1270 99 heat acclimation BP
## 646 502 response to oxidative stress BP
## 2795 22 protein complex oligomerization BP
## 557 85 glutathione metabolic process BP
## 2253 726 defense response to bacterium BP
## 3117 9 cellular response to fatty acid BP
## over_represented_padjust
## 1210 3.579730e-07
## 3164 3.579730e-07
## 3094 2.038399e-06
## 359 2.772030e-04
## 914 2.889966e-04
## 3125 2.933477e-04
## 3114 1.357418e-03
## 638 2.058075e-03
## 1166 5.704937e-03
## 1039 5.876717e-03
## 170 7.084606e-03
## 853 7.084606e-03
## 1919 7.084606e-03
## 854 9.816458e-03
## 3708 1.008168e-02
## 2239 1.858073e-02
## 1270 1.858073e-02
## 646 2.257979e-02
## 2795 2.458274e-02
## 557 2.776287e-02
## 2253 3.731466e-02
## 3117 4.001398e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1001 GO:0009816 1.135827e-05 0.9999990 7
## 890 GO:0009611 1.193363e-05 0.9999976 13
## 3251 GO:0080142 2.049805e-05 0.9999995 4
## 1205 GO:0010193 3.256854e-05 0.9999982 5
## 187 GO:0002237 5.567136e-05 0.9999952 6
## 638 GO:0006952 6.623245e-05 0.9999771 22
## 1210 GO:0010200 8.592369e-05 0.9999857 9
## 973 GO:0009753 1.046543e-04 0.9999800 10
## numInCat term ontology
## 1001 97 defense response to bacterium, incompatible interaction BP
## 890 419 response to wounding BP
## 3251 19 regulation of salicylic acid biosynthetic process BP
## 1205 54 response to ozone BP
## 187 88 response to molecule of bacterial origin BP
## 638 1165 defense response BP
## 1210 286 response to chitin BP
## 973 338 response to jasmonic acid BP
## over_represented_padjust
## 1001 0.02260826
## 890 0.02260826
## 3251 0.02588904
## 1205 0.03085055
## 187 0.04182579
## 638 0.04182579
## 1210 0.04650927
## 973 0.04956691
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742 8.757706e-16 1.0000000 30
## 3708 GO:2000022 7.994984e-11 1.0000000 10
## 423 GO:0006468 5.373869e-10 1.0000000 37
## 2750 GO:0050832 2.165212e-09 1.0000000 17
## 638 GO:0006952 8.977942e-09 1.0000000 28
## 1857 GO:0031347 6.083376e-08 1.0000000 8
## 26 GO:0000103 2.373158e-07 1.0000000 5
## 890 GO:0009611 7.356536e-07 0.9999999 14
## 1210 GO:0010200 3.530396e-06 0.9999996 10
## 1174 GO:0010120 2.304231e-05 0.9999993 4
## 642 GO:0006970 3.054236e-05 0.9999956 9
## 3009 GO:0070417 3.406753e-05 0.9999981 5
## 973 GO:0009753 3.498600e-05 0.9999942 10
## 1183 GO:0010150 8.085242e-05 0.9999885 8
## 1858 GO:0031348 9.938846e-05 0.9999931 5
## 3442 GO:1900067 1.076857e-04 0.9999998 2
## 2223 GO:0042344 1.092568e-04 0.9999979 3
## 972 GO:0009751 1.274416e-04 0.9999777 9
## 2917 GO:0055114 1.933095e-04 0.9999217 26
## numInCat term ontology
## 2253 726 defense response to bacterium BP
## 3708 89 regulation of jasmonic acid mediated signaling pathway BP
## 423 1484 protein phosphorylation BP
## 2750 469 defense response to fungus BP
## 638 1165 defense response BP
## 1857 95 regulation of defense response BP
## 26 24 sulfate assimilation BP
## 890 419 response to wounding BP
## 1210 286 response to chitin BP
## 1174 27 camalexin biosynthetic process BP
## 642 252 response to osmotic stress BP
## 3009 54 cellular response to cold BP
## 973 338 response to jasmonic acid BP
## 1183 219 leaf senescence BP
## 1858 64 negative regulation of defense response BP
## 3442 3 regulation of cellular response to alkaline pH BP
## 2223 15 indole glucosinolate catabolic process BP
## 972 347 response to salicylic acid BP
## 2917 1923 oxidation-reduction process BP
## over_represented_padjust
## 2253 3.318295e-12
## 3708 1.514650e-07
## 423 6.787196e-07
## 2750 2.050997e-06
## 638 6.803485e-06
## 1857 3.841652e-05
## 26 1.284556e-04
## 890 3.484239e-04
## 1210 1.486297e-03
## 1174 8.730730e-03
## 642 1.019707e-02
## 3009 1.019707e-02
## 973 1.019707e-02
## 1183 2.188213e-02
## 1858 2.435142e-02
## 3442 2.435142e-02
## 2223 2.435142e-02
## 972 2.682646e-02
## 2917 3.854999e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345 5.357720e-16 1.0000000 10
## 1181 GO:0010143 9.648414e-08 1.0000000 5
## 514 GO:0006629 1.779627e-07 1.0000000 9
## 2917 GO:0055114 3.330532e-07 0.9999999 23
## 515 GO:0006631 1.472111e-05 0.9999993 5
## 953 GO:0009725 1.731199e-05 0.9999998 3
## 1531 GO:0016042 2.458469e-05 0.9999976 7
## 2136 GO:0035336 4.497929e-05 0.9999994 3
## numInCat term ontology
## 1291 41 suberin biosynthetic process BP
## 1181 29 cutin biosynthetic process BP
## 514 220 lipid metabolic process BP
## 2917 1923 oxidation-reduction process BP
## 515 85 fatty acid metabolic process BP
## 953 14 response to hormone BP
## 1531 229 lipid catabolic process BP
## 2136 17 long-chain fatty-acyl-CoA metabolic process BP
## over_represented_padjust
## 1291 2.030040e-12
## 1181 1.827892e-04
## 514 2.247668e-04
## 2917 3.154847e-04
## 515 1.093252e-02
## 953 1.093252e-02
## 1531 1.330734e-02
## 2136 2.130332e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1695 GO:0019441 1.046761e-06 1.0000000 3
## 646 GO:0006979 1.940648e-06 0.9999998 10
## 1292 GO:0010350 3.796980e-05 0.9999999 2
## 3096 GO:0071286 3.796980e-05 0.9999999 2
## 3104 GO:0071325 3.796980e-05 0.9999999 2
## 3197 GO:0072709 3.796980e-05 0.9999999 2
## 1838 GO:0031115 6.036162e-05 0.9999999 2
## 736 GO:0007568 8.378585e-05 0.9999965 4
## 1572 GO:0016310 9.069704e-05 0.9999832 10
## 2844 GO:0051592 9.983663e-05 0.9999997 2
## 1840 GO:0031117 1.039244e-04 0.9999996 2
## 3201 GO:0075733 1.069764e-04 0.9999996 2
## 3093 GO:0071280 1.177608e-04 0.9999995 2
## 2162 GO:0035865 1.203759e-04 0.9999995 2
## 1619 GO:0018008 1.219134e-04 0.9999995 2
## 1248 GO:0010248 1.574567e-04 0.9999993 2
## 452 GO:0006520 2.210690e-04 0.9999881 4
## numInCat
## 1695 7
## 646 502
## 1292 4
## 3096 4
## 3104 4
## 3197 4
## 1838 5
## 736 85
## 1572 685
## 2844 6
## 1840 7
## 3201 7
## 3093 7
## 2162 7
## 1619 6
## 1248 6
## 452 90
## term
## 1695 tryptophan catabolic process to kynurenine
## 646 response to oxidative stress
## 1292 cellular response to magnesium starvation
## 3096 cellular response to magnesium ion
## 3104 cellular response to mannitol stimulus
## 3197 cellular response to sorbitol
## 1838 negative regulation of microtubule polymerization
## 736 aging
## 1572 phosphorylation
## 2844 response to calcium ion
## 1840 positive regulation of microtubule depolymerization
## 3201 intracellular transport of virus
## 3093 cellular response to copper ion
## 2162 cellular response to potassium ion
## 1619 N-terminal peptidyl-glycine N-myristoylation
## 1248 establishment or maintenance of transmembrane electrochemical gradient
## 452 cellular amino acid metabolic process
## ontology over_represented_padjust
## 1695 BP 0.003676557
## 646 BP 0.003676557
## 1292 BP 0.023977928
## 3096 BP 0.023977928
## 3104 BP 0.023977928
## 3197 BP 0.023977928
## 1838 BP 0.030795317
## 736 BP 0.030795317
## 1572 BP 0.030795317
## 2844 BP 0.030795317
## 1840 BP 0.030795317
## 3201 BP 0.030795317
## 3093 BP 0.030795317
## 2162 BP 0.030795317
## 1619 BP 0.030795317
## 1248 BP 0.037287711
## 452 BP 0.049272379
# using unnest()
temp %>% unnest(GO_result) %>% write_csv(path="../output/twoafternoon.any.trtsoil.DEG.Kmeans.8cluster.csv") # I like this way to have target genes in one data.frame
# 15 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread$transcript_ID, cluster=kClusters.any.trtlive.15) %>%
group_by(cluster) %>%
nest(transcripts=transcript_ID) %>%
mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.))))
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 3114 GO:0071369 0.000000e+00 1.0000000 8
## 3164 GO:0071732 0.000000e+00 1.0000000 8
## 3094 GO:0071281 5.887219e-11 1.0000000 8
## 2907 GO:0055072 1.403391e-06 1.0000000 5
## 359 GO:0006355 9.616408e-06 0.9999972 22
## 1195 GO:0010167 1.116999e-05 0.9999997 4
## 613 GO:0006880 1.510212e-05 0.9999998 3
## 1760 GO:0030001 2.151165e-05 0.9999989 5
## 1764 GO:0030026 2.323359e-05 0.9999997 3
## 254 GO:0006096 5.543201e-05 0.9999966 5
## 612 GO:0006879 6.469221e-05 0.9999989 3
## 1114 GO:0010039 7.167439e-05 0.9999987 3
## 1952 GO:0032869 7.691956e-05 0.9999986 3
## 1462 GO:0015760 9.484983e-05 0.9999997 2
## 2487 GO:0045893 1.310402e-04 0.9999802 8
## 1798 GO:0030418 1.891972e-04 0.9999990 2
## 1449 GO:0015714 1.979116e-04 0.9999990 2
## 1438 GO:0015689 2.211728e-04 0.9999988 2
## numInCat term ontology
## 3114 62 cellular response to ethylene stimulus BP
## 3164 52 cellular response to nitric oxide BP
## 3094 77 cellular response to iron ion BP
## 2907 69 iron ion homeostasis BP
## 359 2992 regulation of transcription, DNA-templated BP
## 1195 53 response to nitrate BP
## 613 20 intracellular sequestering of iron ion BP
## 1760 130 metal ion transport BP
## 1764 21 cellular manganese ion homeostasis BP
## 254 138 glycolytic process BP
## 612 29 cellular iron ion homeostasis BP
## 1114 33 response to iron ion BP
## 1952 30 cellular response to insulin stimulus BP
## 1462 5 glucose-6-phosphate transport BP
## 2487 570 positive regulation of transcription, DNA-templated BP
## 1798 8 nicotianamine biosynthetic process BP
## 1449 7 phosphoenolpyruvate transport BP
## 1438 7 molybdate ion transport BP
## over_represented_padjust
## 3114 0.000000e+00
## 3164 0.000000e+00
## 3094 7.435558e-08
## 2907 1.329362e-03
## 359 7.053851e-03
## 1195 7.053851e-03
## 613 8.174561e-03
## 1760 9.781343e-03
## 1764 9.781343e-03
## 254 2.100319e-02
## 612 2.228352e-02
## 1114 2.241909e-02
## 1952 2.241909e-02
## 1462 2.567043e-02
## 2487 3.310074e-02
## 1798 4.411100e-02
## 1449 4.411100e-02
## 1438 4.655688e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1174 GO:0010120 1.723122e-11 1.0000000 7
## 2253 GO:0042742 6.152850e-08 1.0000000 16
## 2750 GO:0050832 1.527669e-06 0.9999998 11
## 1031 GO:0009863 1.866709e-05 0.9999995 4
## 2230 GO:0042372 3.841027e-05 0.9999995 3
## 900 GO:0009627 5.003514e-05 0.9999970 5
## 894 GO:0009617 6.392564e-05 0.9999927 7
## 975 GO:0009759 6.961346e-05 0.9999988 3
## 855 GO:0009409 7.064222e-05 0.9999859 11
## 2790 GO:0051245 9.537243e-05 0.9999997 2
## 1166 GO:0010112 9.614152e-05 0.9999982 3
## 944 GO:0009697 1.070904e-04 0.9999979 3
## 1210 GO:0010200 1.072639e-04 0.9999867 7
## 3438 GO:1900056 1.520096e-04 0.9999966 3
## 2301 GO:0043069 1.719698e-04 0.9999959 3
## 1858 GO:0031348 1.811769e-04 0.9999908 4
## 557 GO:0006749 1.932567e-04 0.9999899 4
## numInCat term ontology
## 1174 27 camalexin biosynthetic process BP
## 2253 726 defense response to bacterium BP
## 2750 469 defense response to fungus BP
## 1031 38 salicylic acid mediated signaling pathway BP
## 2230 18 phylloquinone biosynthetic process BP
## 900 104 systemic acquired resistance BP
## 894 241 response to bacterium BP
## 975 19 indole glucosinolate biosynthetic process BP
## 855 696 response to cold BP
## 2790 4 negative regulation of cellular defense response BP
## 1166 22 regulation of systemic acquired resistance BP
## 944 21 salicylic acid biosynthetic process BP
## 1210 286 response to chitin BP
## 3438 26 negative regulation of leaf senescence BP
## 2301 30 negative regulation of programmed cell death BP
## 1858 64 negative regulation of defense response BP
## 557 85 glutathione metabolic process BP
## over_represented_padjust
## 1174 6.528911e-08
## 2253 1.165657e-04
## 2750 1.929446e-03
## 1031 1.768240e-02
## 2230 2.910731e-02
## 900 2.974037e-02
## 894 2.974037e-02
## 975 2.974037e-02
## 855 2.974037e-02
## 2790 3.126329e-02
## 1166 3.126329e-02
## 944 3.126329e-02
## 1210 3.126329e-02
## 3438 4.114032e-02
## 2301 4.290496e-02
## 1858 4.290496e-02
## 557 4.307350e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1001 GO:0009816 9.333650e-07 0.9999999 7
## 3708 GO:2000022 1.751096e-06 0.9999999 6
## 894 GO:0009617 1.952047e-06 0.9999998 9
## 1857 GO:0031347 5.230519e-05 0.9999968 5
## numInCat term ontology
## 1001 97 defense response to bacterium, incompatible interaction BP
## 3708 89 regulation of jasmonic acid mediated signaling pathway BP
## 894 241 response to bacterium BP
## 1857 95 regulation of defense response BP
## over_represented_padjust
## 1001 0.002465435
## 3708 0.002465435
## 894 0.002465435
## 1857 0.049546090
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345 0.000000e+00 1.0000000 9
## 1181 GO:0010143 1.631724e-08 1.0000000 5
## 2917 GO:0055114 2.704739e-07 0.9999999 19
## 514 GO:0006629 2.154168e-06 0.9999999 7
## 515 GO:0006631 2.635121e-06 0.9999999 5
## 1531 GO:0016042 3.108001e-05 0.9999976 6
## 1527 GO:0016024 6.205455e-05 0.9999990 3
## numInCat term ontology
## 1291 41 suberin biosynthetic process BP
## 1181 29 cutin biosynthetic process BP
## 2917 1923 oxidation-reduction process BP
## 514 220 lipid metabolic process BP
## 515 85 fatty acid metabolic process BP
## 1531 229 lipid catabolic process BP
## 1527 29 CDP-diacylglycerol biosynthetic process BP
## over_represented_padjust
## 1291 0.000000e+00
## 1181 3.091302e-05
## 2917 3.416085e-04
## 514 1.996895e-03
## 515 1.996895e-03
## 1531 1.962703e-02
## 1527 3.358924e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1530 GO:0016036 3.257160e-06 0.9999999 5
## 3219 GO:0080040 1.654299e-05 1.0000000 2
## numInCat term
## 1530 148 cellular response to phosphate starvation
## 3219 5 positive regulation of cellular response to phosphate starvation
## ontology over_represented_padjust
## 1530 BP 0.01234138
## 3219 BP 0.03134070
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742 2.034123e-17 1.0000000 24
## 423 GO:0006468 1.807727e-12 1.0000000 29
## 1210 GO:0010200 7.269714e-08 1.0000000 9
## 972 GO:0009751 4.471802e-07 1.0000000 9
## 2750 GO:0050832 1.489589e-06 0.9999998 10
## 185 GO:0002229 1.667994e-05 0.9999988 6
## 638 GO:0006952 7.042662e-05 0.9999827 14
## 1174 GO:0010120 7.723694e-05 0.9999986 3
## 1858 GO:0031348 9.506968e-05 0.9999959 4
## 3708 GO:2000022 1.129933e-04 0.9999949 4
## numInCat term ontology
## 2253 726 defense response to bacterium BP
## 423 1484 protein phosphorylation BP
## 1210 286 response to chitin BP
## 972 347 response to salicylic acid BP
## 2750 469 defense response to fungus BP
## 185 126 defense response to oomycetes BP
## 638 1165 defense response BP
## 1174 27 camalexin biosynthetic process BP
## 1858 64 negative regulation of defense response BP
## 3708 89 regulation of jasmonic acid mediated signaling pathway BP
## over_represented_padjust
## 2253 7.707293e-14
## 423 3.424739e-09
## 1210 9.181649e-05
## 972 4.235915e-04
## 2750 1.128811e-03
## 185 1.053338e-02
## 638 3.658134e-02
## 1174 3.658134e-02
## 1858 4.002434e-02
## 3708 4.281315e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1737 GO:0019761 2.032233e-05 0.9999994 4
## 2997 GO:0070179 2.418116e-05 1.0000000 2
## numInCat term ontology
## 1737 69 glucosinolate biosynthetic process BP
## 2997 4 D-serine biosynthetic process BP
## over_represented_padjust
## 1737 0.0458112
## 2997 0.0458112
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1210 GO:0010200 4.498580e-11 1.0000000 19
## 914 GO:0009644 9.502988e-08 1.0000000 10
## 359 GO:0006355 1.299821e-07 1.0000000 58
## 3125 GO:0071456 8.872693e-07 1.0000000 6
## 3708 GO:2000022 2.292534e-06 0.9999998 8
## 2750 GO:0050832 3.085071e-06 0.9999993 17
## 638 GO:0006952 4.156627e-06 0.9999986 29
## 2239 GO:0042542 4.285154e-06 0.9999995 9
## 1919 GO:0032268 8.024532e-06 1.0000000 3
## 854 GO:0009408 8.921277e-06 0.9999983 13
## 58 GO:0000302 3.385541e-05 0.9999966 7
## 853 GO:0009407 3.481444e-05 0.9999973 6
## 2795 GO:0051259 3.977016e-05 0.9999987 4
## 170 GO:0001944 4.197116e-05 0.9999976 5
## 3465 GO:1900457 9.998220e-05 0.9999983 3
## 557 GO:0006749 1.469226e-04 0.9999849 6
## numInCat term ontology
## 1210 286 response to chitin BP
## 914 109 response to high light intensity BP
## 359 2992 regulation of transcription, DNA-templated BP
## 3125 35 cellular response to hypoxia BP
## 3708 89 regulation of jasmonic acid mediated signaling pathway BP
## 2750 469 defense response to fungus BP
## 638 1165 defense response BP
## 2239 130 response to hydrogen peroxide BP
## 1919 5 regulation of cellular protein metabolic process BP
## 854 305 response to heat BP
## 58 96 response to reactive oxygen species BP
## 853 67 toxin catabolic process BP
## 2795 22 protein complex oligomerization BP
## 170 41 vasculature development BP
## 3465 10 regulation of brassinosteroid mediated signaling pathway BP
## 557 85 glutathione metabolic process BP
## over_represented_padjust
## 1210 1.704512e-07
## 914 1.641674e-04
## 359 1.641674e-04
## 3125 8.404659e-04
## 3708 1.737282e-03
## 2750 1.948222e-03
## 638 2.029556e-03
## 2239 2.029556e-03
## 1919 3.378328e-03
## 854 3.380272e-03
## 58 1.099266e-02
## 853 1.099266e-02
## 2795 1.135919e-02
## 170 1.135919e-02
## 3465 2.525550e-02
## 557 3.479310e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 26 GO:0000103 9.838045e-10 1 5
## numInCat term ontology over_represented_padjust
## 26 24 sulfate assimilation BP 3.727635e-06
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1695 GO:0019441 9.629362e-08 1.0000000 3
## 1292 GO:0010350 9.046552e-06 1.0000000 2
## 3096 GO:0071286 9.046552e-06 1.0000000 2
## 3104 GO:0071325 9.046552e-06 1.0000000 2
## 3197 GO:0072709 9.046552e-06 1.0000000 2
## 1838 GO:0031115 1.460646e-05 1.0000000 2
## 859 GO:0009414 1.567843e-05 0.9999986 7
## 2844 GO:0051592 2.317204e-05 1.0000000 2
## 1619 GO:0018008 2.655419e-05 1.0000000 2
## 1840 GO:0031117 2.684154e-05 1.0000000 2
## 3201 GO:0075733 2.734060e-05 0.9999999 2
## 1248 GO:0010248 2.913482e-05 0.9999999 2
## 3093 GO:0071280 2.918774e-05 0.9999999 2
## 2162 GO:0035865 2.961950e-05 0.9999999 2
## 967 GO:0009744 3.347001e-05 0.9999989 4
## 646 GO:0006979 5.215913e-05 0.9999956 6
## 2829 GO:0051511 6.257254e-05 0.9999998 2
## 1282 GO:0010325 1.318124e-04 0.9999994 2
## 736 GO:0007568 1.789968e-04 0.9999956 3
## 3081 GO:0071219 1.826884e-04 0.9999990 2
## 441 GO:0006499 2.422844e-04 0.9999934 3
## numInCat
## 1695 7
## 1292 4
## 3096 4
## 3104 4
## 3197 4
## 1838 5
## 859 596
## 2844 6
## 1619 6
## 1840 7
## 3201 7
## 1248 6
## 3093 7
## 2162 7
## 967 136
## 646 502
## 2829 9
## 1282 11
## 736 85
## 3081 14
## 441 89
## term
## 1695 tryptophan catabolic process to kynurenine
## 1292 cellular response to magnesium starvation
## 3096 cellular response to magnesium ion
## 3104 cellular response to mannitol stimulus
## 3197 cellular response to sorbitol
## 1838 negative regulation of microtubule polymerization
## 859 response to water deprivation
## 2844 response to calcium ion
## 1619 N-terminal peptidyl-glycine N-myristoylation
## 1840 positive regulation of microtubule depolymerization
## 3201 intracellular transport of virus
## 1248 establishment or maintenance of transmembrane electrochemical gradient
## 3093 cellular response to copper ion
## 2162 cellular response to potassium ion
## 967 response to sucrose
## 646 response to oxidative stress
## 2829 negative regulation of unidimensional cell growth
## 1282 raffinose family oligosaccharide biosynthetic process
## 736 aging
## 3081 cellular response to molecule of bacterial origin
## 441 N-terminal protein myristoylation
## ontology over_represented_padjust
## 1695 BP 0.0003648565
## 1292 BP 0.0068554769
## 3096 BP 0.0068554769
## 3104 BP 0.0068554769
## 3197 BP 0.0068554769
## 1838 BP 0.0080163051
## 859 BP 0.0080163051
## 2844 BP 0.0080163051
## 1619 BP 0.0080163051
## 1840 BP 0.0080163051
## 3201 BP 0.0080163051
## 1248 BP 0.0080163051
## 3093 BP 0.0080163051
## 2162 BP 0.0080163051
## 967 BP 0.0084545246
## 646 BP 0.0123519337
## 2829 BP 0.0139463151
## 1282 BP 0.0277465042
## 736 BP 0.0346103103
## 3081 BP 0.0346103103
## 441 BP 0.0437150254
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 853 GO:0009407 7.21299e-06 0.9999998 4
## numInCat term ontology over_represented_padjust
## 853 67 toxin catabolic process BP 0.02733002
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1572 GO:0016310 4.546280e-06 0.9999993 12
## 2750 GO:0050832 9.987204e-06 0.9999988 9
## 423 GO:0006468 1.451836e-05 0.9999962 18
## numInCat term ontology over_represented_padjust
## 1572 685 phosphorylation BP 0.01722585
## 2750 469 defense response to fungus BP 0.01833669
## 423 1484 protein phosphorylation BP 0.01833669
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1166 GO:0010112 5.131427e-11 1.0000000 7
## 638 GO:0006952 1.205807e-09 1.0000000 28
## 890 GO:0009611 4.343094e-07 0.9999999 14
## 899 GO:0009626 4.232341e-06 0.9999996 8
## 973 GO:0009753 7.729912e-06 0.9999988 11
## 2253 GO:0042742 1.663149e-05 0.9999959 16
## 1205 GO:0010193 1.870619e-05 0.9999991 5
## 2237 GO:0042538 9.358914e-05 0.9999910 6
## numInCat term ontology
## 1166 22 regulation of systemic acquired resistance BP
## 638 1165 defense response BP
## 890 419 response to wounding BP
## 899 140 plant-type hypersensitive response BP
## 973 338 response to jasmonic acid BP
## 2253 726 defense response to bacterium BP
## 1205 54 response to ozone BP
## 2237 132 hyperosmotic salinity response BP
## over_represented_padjust
## 1166 1.944298e-07
## 638 2.284402e-06
## 890 5.485327e-04
## 899 4.009085e-03
## 973 5.857727e-03
## 2253 1.012539e-02
## 1205 1.012539e-02
## 2237 4.432616e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 946 GO:0009699 5.793939e-06 0.9999999 4
## 890 GO:0009611 6.268525e-06 0.9999993 9
## 602 GO:0006855 7.426144e-06 0.9999996 6
## numInCat term ontology
## 946 34 phenylpropanoid biosynthetic process BP
## 890 419 response to wounding BP
## 602 130 drug transmembrane transport BP
## over_represented_padjust
## 946 0.00937922
## 890 0.00937922
## 602 0.00937922
# using unnest()
temp %>% unnest(GO_result) %>% write_csv(path="../output/twoafternoon.any.trtsoil.DEG.Kmeans.15cluster.csv")
diurnal34.time.DEGs.all.v3.0anno
# scaling expression data
cpm.timecourse.v3.0.scale<-t(scale(t(cpm.timecourse.v3.0[,-1]))) %>% as_tibble() %>% bind_cols(data.frame(transcript_ID=cpm.timecourse.v3.0$transcript_ID[]),.)
# diurnal 3and4 days DEG expression data (scaled)
cpm.timecourse.v3.0.scale.diurnal34.time.DEG<-cpm.timecourse.v3.0.scale %>%
inner_join(diurnal34.time.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>%
gather(sample,value,-1) %>% inner_join(sample.description.timecourse,by="sample") %>% filter(sampling_day %in% c("03","04")) #[1] 6774 121
## Warning: Column `transcript_ID`/`genes` joining factor and character vector,
## coercing into character vector
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
with(cpm.timecourse.v3.0.scale.diurnal34.time.DEG,table(sampling_day,sample)) # OK
## sample
## sampling_day 1a1_q_002_S1_R1_001 1a2_q_003_S2_R1_001 1a4_q_005_S4_R1_001
## 03 0 0 6774
## 04 6774 6774 0
## sample
## sampling_day 1a6_q_007_S6_R1_001 1b2_q_013_S10_R1_001 1b4_q_015_S12_R1_001
## 03 6774 0 6774
## 04 0 6774 0
## sample
## sampling_day 1b5_q_016_S13_R1_001 1b8_q_022_S16_R1_001 1c1_q_023_S17_R1_001
## 03 6774 0 6774
## 04 0 6774 0
## sample
## sampling_day 1c5_q_027_S21_R1_001 1c7_q_031_S23_R1_001 1c8_q_032_S24_R1_001
## 03 0 0 0
## 04 6774 6774 6774
## sample
## sampling_day 1d2_q_037_S26_R1_001 1d6_q_044_S30_R1_001 1e2_q_050_S34_R1_001
## 03 0 6774 6774
## 04 6774 0 0
## sample
## sampling_day 1f2_q_062_S42_R1_001 1f4_q_066_S44_R1_001 1f5_q_068_S45_R1_001
## 03 6774 0 6774
## 04 0 6774 0
## sample
## sampling_day 1f7_q_071_S47_R1_001 1f8_q_072_S48_R1_001 1g7_q_082_S55_R1_001
## 03 6774 0 6774
## 04 0 6774 0
## sample
## sampling_day 1h5_q_091_S61_R1_001 1h6_q_095_S62_R1_001 1h7_q_096_S63_R1_001
## 03 6774 0 6774
## 04 0 6774 0
## sample
## sampling_day 1i1_q_098_S65_R1_001 1i4_q_106_S68_R1_001 1i6_q_108_S70_R1_001
## 03 0 6774 0
## 04 6774 0 6774
## sample
## sampling_day 1i8_q_111_S72_R1_001 1j1_q_112_S73_R1_001 1j2_q_113_S74_R1_001
## 03 0 6774 0
## 04 6774 0 6774
## sample
## sampling_day 1j4_q_115_S76_R1_001 1j5_q_116_S77_R1_001 1j6_q_117_S78_R1_001
## 03 0 6774 6774
## 04 6774 0 0
## sample
## sampling_day 1j8_q_119_S80_R1_001 1k4_q_127_S84_R1_001 1k7_q_134_S87_R1_001
## 03 0 0 0
## 04 6774 6774 6774
## sample
## sampling_day 1l1_q_136_S89_R1_001 1l4_q_139_S92_R1_001 1l7_q_143_S95_R1_001
## 03 6774 6774 6774
## 04 0 0 0
## sample
## sampling_day 1l8_q_144_S96_R1_001 2a1_q_146_S97_R1_001 2a4_q_150_S100_R1_001
## 03 0 6774 0
## 04 6774 0 6774
## sample
## sampling_day 2a5_q_151_S101_R1_001 2a6_q_152_S102_R1_001 2b3_q_160_S107_R1_001
## 03 6774 0 6774
## 04 0 6774 0
## sample
## sampling_day 2c7_q_178_S119_R1_001 2c8_q_179_S120_R1_001 2d1_q_180_S121_R1_001
## 03 0 0 0
## 04 6774 6774 6774
## sample
## sampling_day 2d3_q_182_S123_R1_001 2d5_q_184_S125_R1_001 2e2_q_196_S130_R1_001
## 03 0 6774 6774
## 04 6774 0 0
## sample
## sampling_day 2e4_q_199_S132_R1_001 2e5_q_200_S133_R1_001 2e7_q_201_S135_R1_001
## 03 6774 0 6774
## 04 0 6774 0
## sample
## sampling_day 2e8_q_203_S136_R1_001 2f2_q_205_S138_R1_001 2f4_q_208_S140_R1_001
## 03 6774 0 0
## 04 0 6774 6774
## sample
## sampling_day 2f6_q_212_S142_R1_001 2f7_q_213_S143_R1_001 2f8_q_216_S144_R1_001
## 03 0 6774 6774
## 04 6774 0 0
## sample
## sampling_day 2g3_q_220_S147_R1_001 2g5_q_226_S149_R1_001 2g7_q_228_S151_R1_001
## 03 0 0 6774
## 04 6774 6774 0
## sample
## sampling_day 2h1_q_230_S153_R1_001 2h5_q_236_S157_R1_001 2h8_q_240_S160_R1_001
## 03 6774 6774 6774
## 04 0 0 0
## sample
## sampling_day 2i5_q_247_S165_R1_001 2i8_q_249_S168_R1_001 2j4_q_254_S172_R1_001
## 03 0 0 6774
## 04 6774 6774 0
## sample
## sampling_day 2j5_q_255_S173_R1_001 2k3_q_266_S179_R1_001 2k5_q_271_S181_R1_001
## 03 0 6774 0
## 04 6774 0 6774
## sample
## sampling_day 2k6_q_272_S182_R1_001 2k7_q_273_S183_R1_001 2k8_q_275_S184_R1_001
## 03 6774 0 6774
## 04 0 6774 0
## sample
## sampling_day 2l1_q_276_S185_R1_001 2l3_q_280_S187_R1_001 2l4_q_282_S188_R1_001
## 03 0 6774 0
## 04 6774 0 6774
## sample
## sampling_day 2l5_q_285_S189_R1_001 2l6_q_286_S190_R1_001 3a2_q_292_S194_R1_001
## 03 6774 0 6774
## 04 0 6774 0
## sample
## sampling_day 3a4_q_294_S196_R1_001 3a6_q_296_S198_R1_001 3b2_q_301_S202_R1_001
## 03 0 0 6774
## 04 6774 6774 0
## sample
## sampling_day 3b7_q_307_S207_R1_001 3b8_q_308_S208_R1_001 3c1_q_311_S209_R1_001
## 03 6774 0 6774
## 04 0 6774 0
## sample
## sampling_day 3c3_q_314_S211_R1_001 3c4_q_315_S212_R1_001 3c5_q_317_S213_R1_001
## 03 6774 0 6774
## 04 0 6774 0
## sample
## sampling_day 3c6_q_318_S214_R1_001 3d5_q_330_S221_R1_001 3d7_q_334_S223_R1_001
## 03 6774 6774 0
## 04 0 0 6774
## sample
## sampling_day 3d8_q_336_S224_R1_001 3e2_q_342_S226_R1_001 3e3_q_343_S227_R1_001
## 03 0 0 6774
## 04 6774 6774 0
## sample
## sampling_day 3e4_q_344_S228_R1_001 3e6_q_350_S230_R1_001 3f1_q_353_S233_R1_001
## 03 0 0 6774
## 04 6774 6774 0
## sample
## sampling_day 3f3_q_354_S235_R1_001 3g2_q_364_S242_R1_001 3g4_q_366_S244_R1_001
## 03 0 0 6774
## 04 6774 6774 0
## sample
## sampling_day 3g5_q_367_S245_R1_001 3h7_q_384_S255_R1_001 3i1_q_388_S257_R1_001
## 03 0 0 0
## 04 6774 6774 6774
## sample
## sampling_day 3i2_q_389_S258_R1_001 3i3_q_391_S259_R1_001 3i6_q_395_S262_R1_001
## 03 6774 6774 6774
## 04 0 0 0
## sample
## sampling_day 3i7_q_396_S263_R1_001 3j3_q_401_S267_R1_001 3j4_q_403_S268_R1_001
## 03 6774 0 0
## 04 0 6774 6774
## sample
## sampling_day 3j6_q_407_S270_R1_001 3j7_q_409_S271_R1_001 3k1_q_411_S273_R1_001
## 03 6774 6774 0
## 04 0 0 6774
## sample
## sampling_day 3k5_q_415_S277_R1_001 3l2_q_423_S282_R1_001 3l3_q_424_S283_R1_001
## 03 0 6774 6774
## 04 6774 0 0
## sample
## sampling_day 3l4_q_426_S284_R1_001 3l5_q_427_S285_R1_001 3l7_q_429_S287_R1_001
## 03 6774 0 0
## 04 0 6774 6774
# spread"
cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread<-cpm.timecourse.v3.0.scale.diurnal34.time.DEG %>% dplyr::select(transcript_ID,sample,value) %>% spread(sample,value,-1)
dim(cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread) # [1] 6774 121
## [1] 6774 121
# calculate wss
wss <- (nrow(cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread[,-1],2,var))
for (i in 2:30) wss[i] <- sum(kmeans(cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread[,-1],
centers=i,iter.max = 20)$withinss) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
plot(1:30, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")
Let’s perform the actual clsutering using K=6:
set.seed(20)
kClust.diurnal34.time.6 <- kmeans(cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread[,-1], centers=6, nstart = 1000, iter.max = 20)
kClusters.diurnal34.time.6 <- kClust.diurnal34.time.6$cluster
# number of clusters
cluster.diurnal34.time.6.num<-tibble(cluster=kClusters.diurnal34.time.6) %>% group_by(cluster) %>% summarize(n=n())
cluster.diurnal34.time.6.num$cluster<-as.character(cluster.diurnal34.time.6.num$cluster) # classic way
cluster.diurnal34.time.6.num
Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i
kClustcentroids.diurnal34.time.6 <- sapply(levels(factor(kClusters.diurnal34.time.6)), clust.centroid, cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread[,-1], kClusters.diurnal34.time.6)
kClustcentroids.diurnal34.time.6 %>% head()
## 1 2 3 4 5
## 1a1_q_002_S1_R1_001 -0.08766409 -0.28375827 0.3043848 -0.46588371 0.21342401
## 1a2_q_003_S2_R1_001 -0.88131939 0.05119095 -0.2101213 0.07575289 0.06628338
## 1a4_q_005_S4_R1_001 -0.55614078 0.07581021 -0.2089740 -0.30749340 0.02435838
## 1a6_q_007_S6_R1_001 -0.23204612 -0.19617309 -0.2666684 0.31211120 0.72286399
## 1b2_q_013_S10_R1_001 -0.10032508 -0.17246169 0.4241383 1.28913194 -0.22397835
## 1b4_q_015_S12_R1_001 -0.13558966 -0.22616853 0.3939676 1.20510364 -0.31804261
## 6
## 1a1_q_002_S1_R1_001 -0.266144832
## 1a2_q_003_S2_R1_001 0.747608834
## 1a4_q_005_S4_R1_001 1.090735726
## 1a6_q_007_S6_R1_001 -0.004685789
## 1b2_q_013_S10_R1_001 0.131912617
## 1b4_q_015_S12_R1_001 0.279213553
# adding sample description to data
data.sample<-kClustcentroids.diurnal34.time.6 %>% as_tibble(rownames="sample") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse,by="sample") %>%
inner_join(cluster.diurnal34.time.6.num,by="cluster") %>%
mutate(cluster.n=glue::glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
data.group<-data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% group_by(sampling_time.soil.cluster) %>% summarize(sampling_time.soil.cluster.mean=mean(value)) %>% inner_join(
data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% dplyr::select("sampling_time.soil.cluster","sampling_time","sampling_day","soil_trt","cluster.n","cluster"),by="sampling_time.soil.cluster") %>% dplyr::slice(rep(1:1800)[!duplicated(.$sampling_time.soil.cluster)])
# plot
p6.diurnal34.time<- ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster),shape=sampling_day)) +
geom_jitter(alpha=0.2) + geom_hline(yintercept=0,color="red") +
geom_line(data=data.group,aes(x=soil_trt,y=sampling_time.soil.cluster.mean)) +
facet_grid(cluster.n~sampling_time,scales="free") + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of diurnal DEGs (day 3 and 4): six clusters",color = "Cluster",y="scaled expression level")
p6.diurnal34.time
ggsave(p6.diurnal34.time,file="../output/diurnal34.time.DEG.Kmean.6clusters.png",width=11,height=15)
Let’s perform the actual clsutering using K=15:
set.seed(20)
kClust.diurnal34.time.15 <- kmeans(cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread[,-1], centers=15, nstart = 1000, iter.max = 20)
kClusters.diurnal34.time.15 <- kClust.diurnal34.time.15$cluster
# number of clusters
cluster.diurnal34.time.15.num<-tibble(cluster=kClusters.diurnal34.time.15) %>% group_by(cluster) %>% summarize(n=n())
cluster.diurnal34.time.15.num$cluster<-as.character(cluster.diurnal34.time.15.num$cluster) # classic way
cluster.diurnal34.time.15.num
Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i
kClustcentroids.diurnal34.time.15 <- sapply(levels(factor(kClusters.diurnal34.time.15)), clust.centroid, cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread[,-1], kClusters.diurnal34.time.15)
kClustcentroids.diurnal34.time.15 %>% head()
## 1 2 3 4 5
## 1a1_q_002_S1_R1_001 0.2885866 0.2378342 -0.6047991 -0.5744638 -0.25382905
## 1a2_q_003_S2_R1_001 -0.1328990 -0.7039343 -0.2884263 -0.6839978 0.82912505
## 1a4_q_005_S4_R1_001 -0.4752669 -0.4612446 -0.5266081 1.0424779 1.11326429
## 1a6_q_007_S6_R1_001 -0.6306024 0.2121359 0.5424499 0.1902676 0.03288627
## 1b2_q_013_S10_R1_001 0.4841676 -0.1385557 1.4998500 0.6134886 -0.28789826
## 1b4_q_015_S12_R1_001 0.3289299 -0.2487495 1.3233530 0.6989989 -0.10277480
## 6 7 8 9 10
## 1a1_q_002_S1_R1_001 0.4128140 0.3867571 -0.1960882 -0.37242665 -0.04356712
## 1a2_q_003_S2_R1_001 -0.7206098 -0.5591857 0.9373677 -0.87871570 -0.03295028
## 1a4_q_005_S4_R1_001 -0.2200417 -0.3096520 0.6308335 -0.60801022 -0.07359736
## 1a6_q_007_S6_R1_001 -0.4042662 0.5961622 -0.4481958 -0.49284345 -0.08888074
## 1b2_q_013_S10_R1_001 0.1238924 -0.3058549 0.6559920 -0.07016679 -0.01662834
## 1b4_q_015_S12_R1_001 0.2015720 -0.2612193 0.8432035 -0.10168012 -0.01782574
## 11 12 13 14 15
## 1a1_q_002_S1_R1_001 0.30119260 -0.52449659 0.3969915 -0.17131547 -0.34740533
## 1a2_q_003_S2_R1_001 -0.03425461 0.09808085 0.1716039 1.30592230 -0.20882930
## 1a4_q_005_S4_R1_001 0.03116806 0.18471973 -0.3252010 0.76223845 -0.03195129
## 1a6_q_007_S6_R1_001 0.86162058 -0.51069396 0.3343381 0.37483605 0.42372214
## 1b2_q_013_S10_R1_001 -0.26838510 -0.31092742 0.7993373 -0.02639449 -0.44474854
## 1b4_q_015_S12_R1_001 -0.39770479 -0.42012854 0.7927001 -0.11193483 -0.54695574
# adding sample description to data
data.sample<-kClustcentroids.diurnal34.time.15 %>% as_tibble(rownames="sample") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse,by="sample") %>%
inner_join(cluster.diurnal34.time.15.num,by="cluster") %>%
mutate(cluster.n=glue::glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
data.group<-data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% group_by(sampling_time.soil.cluster) %>% summarize(sampling_time.soil.cluster.mean=mean(value)) %>% inner_join(
data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% dplyr::select("sampling_time.soil.cluster","sampling_time","sampling_day","soil_trt","cluster.n","cluster"),by="sampling_time.soil.cluster") %>% dplyr::slice(rep(1:1800)[!duplicated(.$sampling_time.soil.cluster)])
# plot
p15.diurnal34.time<- ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster),shape=sampling_day)) +
geom_jitter(alpha=0.2) + geom_hline(yintercept=0,color="red") +
geom_line(data=data.group,aes(x=soil_trt,y=sampling_time.soil.cluster.mean)) +
facet_grid(cluster.n~sampling_time,scales="free") + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of diurnal DEGs (day 3 and 4): fifteen clusters",color = "Cluster",y="scaled expression level")
p15.diurnal34.time
ggsave(p15.diurnal34.time,file="../output/diurnal34.time.DEG.Kmean.15clusters.png",width=11,height=15)
diurnal1314.time.DEGs.all.v3.0anno
# scaling expression data
cpm.timecourse.v3.0.scale<-t(scale(t(cpm.timecourse.v3.0[,-1]))) %>% as_tibble() %>% bind_cols(data.frame(transcript_ID=cpm.timecourse.v3.0$transcript_ID[]),.)
# diurnal 3and4 days DEG expression data (scaled)
cpm.timecourse.v3.0.scale.diurnal1314.time.DEG<-cpm.timecourse.v3.0.scale %>%
inner_join(diurnal1314.time.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>%
gather(sample,value,-1) %>% inner_join(sample.description.timecourse,by="sample") %>% filter(sampling_day %in% c("13","14")) #[1] 6774 121
## Warning: Column `transcript_ID`/`genes` joining factor and character vector,
## coercing into character vector
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
with(cpm.timecourse.v3.0.scale.diurnal1314.time.DEG,table(sampling_day,sample)) # OK
## sample
## sampling_day 1a5_q_006_S5_R1_001 1b1_q_012_S9_R1_001 1b3_q_014_S11_R1_001
## 13 11886 0 11886
## 14 0 11886 0
## sample
## sampling_day 1b6_q_017_S14_R1_001 1b7_q_020_S15_R1_001 1c2_q_024_S18_R1_001
## 13 0 11886 0
## 14 11886 0 11886
## sample
## sampling_day 1c3_q_025_S19_R1_001 1c4_q_026_S20_R1_001 1c6_q_028_S22_R1_001
## 13 0 0 0
## 14 11886 11886 11886
## sample
## sampling_day 1d1_q_035_S25_R1_001 1d4_q_040_S28_R1_001 1d5_q_042_S29_R1_001
## 13 0 11886 11886
## 14 11886 0 0
## sample
## sampling_day 1d7_q_045_S31_R1_001 1d8_q_046_S32_R1_001 1e1_q_048_S33_R1_001
## 13 0 11886 11886
## 14 11886 0 0
## sample
## sampling_day 1e3_q_053_S35_R1_001 1e4_q_055_S36_R1_001 1e7_q_058_S39_R1_001
## 13 11886 0 0
## 14 0 11886 11886
## sample
## sampling_day 1f1_q_060_S41_R1_001 1f6_q_070_S46_R1_001 1g1_q_073_S49_R1_001
## 13 11886 11886 0
## 14 0 0 11886
## sample
## sampling_day 1g2_q_074_S50_R1_001 1g4_q_077_S52_R1_001 1g5_q_080_S53_R1_001
## 13 11886 11886 11886
## 14 0 0 0
## sample
## sampling_day 1g6_q_081_S54_R1_001 1g8_q_083_S56_R1_001 1h1_q_084_S57_R1_001
## 13 0 0 0
## 14 11886 11886 11886
## sample
## sampling_day 1h2_q_085_S58_R1_001 1h8_q_097_S64_R1_001 1i3_q_105_S67_R1_001
## 13 11886 0 0
## 14 0 11886 11886
## sample
## sampling_day 1i5_q_107_S69_R1_001 1i7_q_110_S71_R1_001 1j3_q_114_S75_R1_001
## 13 0 0 11886
## 14 11886 11886 0
## sample
## sampling_day 1k1_q_120_S81_R1_001 1k3_q_123_S83_R1_001 1k5_q_128_S85_R1_001
## 13 11886 11886 11886
## 14 0 0 0
## sample
## sampling_day 1l2_q_137_S90_R1_001 1l3_q_138_S91_R1_001 1l5_q_141_S93_R1_001
## 13 11886 11886 0
## 14 0 0 11886
## sample
## sampling_day 1l6_q_142_S94_R1_001 2a2_q_147_S98_R1_001 2a3_q_148_S99_R1_001
## 13 0 11886 11886
## 14 11886 0 0
## sample
## sampling_day 2a8_q_154_S104_R1_001 2b1_q_156_S105_R1_001 2b2_q_158_S106_R1_001
## 13 11886 11886 0
## 14 0 0 11886
## sample
## sampling_day 2b4_q_161_S108_R1_001 2b5_q_162_S109_R1_001 2b6_q_164_S110_R1_001
## 13 0 11886 0
## 14 11886 0 11886
## sample
## sampling_day 2c1_q_168_S113_R1_001 2c2_q_169_S114_R1_001 2c5_q_173_S117_R1_001
## 13 11886 11886 0
## 14 0 0 11886
## sample
## sampling_day 2c6_q_175_S118_R1_001 2d4_q_183_S124_R1_001 2d6_q_185_S126_R1_001
## 13 0 0 0
## 14 11886 11886 11886
## sample
## sampling_day 2d8_q_190_S128_R1_001 2e1_q_193_S129_R1_001 2e3_q_198_S131_R1_001
## 13 11886 0 11886
## 14 0 11886 0
## sample
## sampling_day 2f1_q_204_S137_R1_001 2f3_q_206_S139_R1_001 2f5_q_211_S141_R1_001
## 13 0 11886 0
## 14 11886 0 11886
## sample
## sampling_day 2g2_q_219_S146_R1_001 2g6_q_227_S150_R1_001 2g8_q_229_S152_R1_001
## 13 11886 0 11886
## 14 0 11886 0
## sample
## sampling_day 2h2_q_231_S154_R1_001 2h3_q_232_S155_R1_001 2h6_q_237_S158_R1_001
## 13 0 11886 0
## 14 11886 0 11886
## sample
## sampling_day 2h7_q_238_S159_R1_001 2i2_q_243_S162_R1_001 2i3_q_245_S163_R1_001
## 13 0 11886 11886
## 14 11886 0 0
## sample
## sampling_day 2i4_q_246_S164_R1_001 2i7_q_248_S167_R1_001 2j2_q_252_S170_R1_001
## 13 11886 0 11886
## 14 0 11886 0
## sample
## sampling_day 2j3_q_253_S171_R1_001 2j8_q_261_S176_R1_001 2k1_q_263_S177_R1_001
## 13 0 11886 11886
## 14 11886 0 0
## sample
## sampling_day 2k2_q_265_S178_R1_001 2k4_q_267_S180_R1_001 2l2_q_278_S186_R1_001
## 13 0 0 11886
## 14 11886 11886 0
## sample
## sampling_day 2l7_q_287_S191_R1_001 2l8_q_288_S192_R1_001 3a3_q_293_S195_R1_001
## 13 0 0 11886
## 14 11886 11886 0
## sample
## sampling_day 3a5_q_295_S197_R1_001 3a7_q_297_S199_R1_001 3a8_q_299_S200_R1_001
## 13 11886 11886 0
## 14 0 0 11886
## sample
## sampling_day 3b1_q_300_S201_R1_001 3b3_q_302_S203_R1_001 3b4_q_303_S204_R1_001
## 13 0 0 0
## 14 11886 11886 11886
## sample
## sampling_day 3b6_q_306_S206_R1_001 3c2_q_312_S210_R1_001 3c8_q_323_S216_R1_001
## 13 11886 11886 0
## 14 0 0 11886
## sample
## sampling_day 3d1_q_324_S217_R1_001 3d2_q_325_S218_R1_001 3d3_q_326_S219_R1_001
## 13 0 11886 11886
## 14 11886 0 0
## sample
## sampling_day 3d4_q_329_S220_R1_001 3e1_q_339_S225_R1_001 3e5_q_348_S229_R1_001
## 13 11886 11886 0
## 14 0 0 11886
## sample
## sampling_day 3e7_q_351_S231_R1_001 3e8_q_352_S232_R1_001 3f5_q_358_S237_R1_001
## 13 0 11886 0
## 14 11886 0 11886
## sample
## sampling_day 3f6_q_359_S238_R1_001 3g1_q_362_S241_R1_001 3g3_q_365_S243_R1_001
## 13 0 0 0
## 14 11886 11886 11886
## sample
## sampling_day 3g6_q_369_S246_R1_001 3g7_q_370_S247_R1_001 3g8_q_371_S248_R1_001
## 13 11886 0 11886
## 14 0 11886 0
## sample
## sampling_day 3h1_q_372_S249_R1_001 3h4_q_376_S252_R1_001 3h6_q_378_S254_R1_001
## 13 11886 0 11886
## 14 0 11886 0
## sample
## sampling_day 3i5_q_393_S261_R1_001 3i8_q_397_S264_R1_001 3j1_q_398_S265_R1_001
## 13 11886 11886 0
## 14 0 0 11886
## sample
## sampling_day 3j2_q_399_S266_R1_001 3j5_q_405_S269_R1_001 3j8_q_410_S272_R1_001
## 13 0 0 11886
## 14 11886 11886 0
## sample
## sampling_day 3k2_q_412_S274_R1_001 3k3_q_413_S275_R1_001 3k4_q_414_S276_R1_001
## 13 11886 11886 0
## 14 0 0 11886
## sample
## sampling_day 3k8_q_420_S280_R1_001 3l6_q_428_S286_R1_001 3l8_q_432_S288_R1_001
## 13 0 11886 0
## 14 11886 0 11886
# spread"
cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread<-cpm.timecourse.v3.0.scale.diurnal1314.time.DEG %>% dplyr::select(transcript_ID,sample,value) %>% spread(sample,value,-1)
dim(cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread) # [1] 6774 121
## [1] 11886 121
# calculate wss
wss <- (nrow(cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread[,-1],2,var))
for (i in 2:30) wss[i] <- sum(kmeans(cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread[,-1],
centers=i,iter.max = 20)$withinss) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
plot(1:30, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")
Let’s perform the actual clsutering using K=6:
set.seed(20)
kClust.diurnal1314.time.6 <- kmeans(cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread[,-1], centers=6, nstart = 1000, iter.max = 20)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)
kClusters.diurnal1314.time.6 <- kClust.diurnal1314.time.6$cluster
# number of clusters
cluster.diurnal1314.time.6.num<-tibble(cluster=kClusters.diurnal1314.time.6) %>% group_by(cluster) %>% summarize(n=n())
cluster.diurnal1314.time.6.num$cluster<-as.character(cluster.diurnal1314.time.6.num$cluster) # classic way
cluster.diurnal1314.time.6.num
Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i
kClustcentroids.diurnal1314.time.6 <- sapply(levels(factor(kClusters.diurnal1314.time.6)), clust.centroid, cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread[,-1], kClusters.diurnal1314.time.6)
kClustcentroids.diurnal1314.time.6 %>% head()
## 1 2 3 4 5
## 1a5_q_006_S5_R1_001 1.00607771 -0.38790092 -0.31463890 -0.26761449 0.5250045
## 1b1_q_012_S9_R1_001 0.34805286 -0.27211138 0.06583565 0.24194275 0.3304202
## 1b3_q_014_S11_R1_001 0.96268730 -0.42494557 -0.56290977 0.36674916 0.5140436
## 1b6_q_017_S14_R1_001 -0.62960914 -0.58599384 0.80073093 -0.11650377 0.9495071
## 1b7_q_020_S15_R1_001 0.99198831 -0.30439734 -0.63553067 0.09323508 0.4032669
## 1c2_q_024_S18_R1_001 -0.01010596 0.03600731 0.06221196 0.36154325 -0.2586159
## 6
## 1a5_q_006_S5_R1_001 0.127231417
## 1b1_q_012_S9_R1_001 -0.140044368
## 1b3_q_014_S11_R1_001 0.001924006
## 1b6_q_017_S14_R1_001 -0.166159823
## 1b7_q_020_S15_R1_001 -0.054333075
## 1c2_q_024_S18_R1_001 -0.277725538
# adding sample description to data
data.sample<-kClustcentroids.diurnal1314.time.6 %>% as_tibble(rownames="sample") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse,by="sample") %>%
inner_join(cluster.diurnal1314.time.6.num,by="cluster") %>%
mutate(cluster.n=glue::glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
data.group<-data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% group_by(sampling_time.soil.cluster) %>% summarize(sampling_time.soil.cluster.mean=mean(value)) %>% inner_join(
data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% dplyr::select("sampling_time.soil.cluster","sampling_time","sampling_day","soil_trt","cluster.n","cluster"),by="sampling_time.soil.cluster") %>% dplyr::slice(rep(1:1800)[!duplicated(.$sampling_time.soil.cluster)])
# plot
p6.diurnal1314.time<- ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster),shape=sampling_day)) +
geom_jitter(alpha=0.2) + geom_hline(yintercept=0,color="red") +
geom_line(data=data.group,aes(x=soil_trt,y=sampling_time.soil.cluster.mean)) +
facet_grid(cluster.n~sampling_time,scales="free") + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of diurnal DEGs (day 3 and 4): six clusters",color = "Cluster",y="scaled expression level")
p6.diurnal1314.time
ggsave(p6.diurnal1314.time,file="../output/diurnal1314.time.DEG.Kmean.6clusters.png",width=11,height=15)
Let’s perform the actual clsutering using K=15:
set.seed(20)
kClust.diurnal1314.time.15 <- kmeans(cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread[,-1], centers=15, nstart = 1000, iter.max = 20)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)
kClusters.diurnal1314.time.15 <- kClust.diurnal1314.time.15$cluster
# number of clusters
cluster.diurnal1314.time.15.num<-tibble(cluster=kClusters.diurnal1314.time.15) %>% group_by(cluster) %>% summarize(n=n())
cluster.diurnal1314.time.15.num$cluster<-as.character(cluster.diurnal1314.time.15.num$cluster) # classic way
cluster.diurnal1314.time.15.num
Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i
kClustcentroids.diurnal1314.time.15 <- sapply(levels(factor(kClusters.diurnal1314.time.15)), clust.centroid, cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread[,-1], kClusters.diurnal1314.time.15)
kClustcentroids.diurnal1314.time.15 %>% head()
## 1 2 3 4 5
## 1a5_q_006_S5_R1_001 0.90822348 0.7193382 0.78837550 0.3546389 -0.2685805
## 1b1_q_012_S9_R1_001 0.25532580 0.2161972 1.05435667 -0.1085664 -0.5004343
## 1b3_q_014_S11_R1_001 1.37268253 0.2996500 0.03584396 0.1611360 -0.2226214
## 1b6_q_017_S14_R1_001 -0.63972085 0.2377625 0.15627584 -0.9651293 -0.4043945
## 1b7_q_020_S15_R1_001 0.98105946 0.1122274 0.63255285 0.3856952 -0.4594923
## 1c2_q_024_S18_R1_001 0.05140275 -0.5287403 0.30530714 -0.1455849 -0.5466622
## 6 7 8 9 10
## 1a5_q_006_S5_R1_001 0.66642092 -0.7975201 -0.31462479 -0.1276376 -0.4146333
## 1b1_q_012_S9_R1_001 0.14143216 -0.5234203 0.23789419 0.5367506 0.2948521
## 1b3_q_014_S11_R1_001 0.07170803 -0.7828654 0.07774152 0.6893292 -0.7533285
## 1b6_q_017_S14_R1_001 -0.29193113 -0.6605074 -0.44737701 0.2126839 0.9142712
## 1b7_q_020_S15_R1_001 0.41865948 -0.6796341 0.12426110 0.2872740 -0.7746740
## 1c2_q_024_S18_R1_001 0.03680482 0.1006786 0.32841856 0.2284217 0.3742139
## 11 12 13 14 15
## 1a5_q_006_S5_R1_001 0.50952939 1.3870932 -0.002542094 0.006688155 -0.5537269
## 1b1_q_012_S9_R1_001 0.22195302 0.9202516 -0.116463977 -0.133184823 -0.5670371
## 1b3_q_014_S11_R1_001 1.00248063 -0.1448134 -0.072121978 0.056775228 0.5000807
## 1b6_q_017_S14_R1_001 1.44055235 0.1676767 -0.169097545 0.732294749 0.4968466
## 1b7_q_020_S15_R1_001 0.51335099 1.1859876 -0.067612254 -0.188951246 -0.6213241
## 1c2_q_024_S18_R1_001 0.02784435 -0.6198037 -0.105822254 -0.503023534 0.4579171
# adding sample description to data
data.sample<-kClustcentroids.diurnal1314.time.15 %>% as_tibble(rownames="sample") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse,by="sample") %>%
inner_join(cluster.diurnal1314.time.15.num,by="cluster") %>%
mutate(cluster.n=glue::glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
data.group<-data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% group_by(sampling_time.soil.cluster) %>% summarize(sampling_time.soil.cluster.mean=mean(value)) %>% inner_join(
data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% dplyr::select("sampling_time.soil.cluster","sampling_time","sampling_day","soil_trt","cluster.n","cluster"),by="sampling_time.soil.cluster") %>% dplyr::slice(rep(1:1800)[!duplicated(.$sampling_time.soil.cluster)])
# plot
p15.diurnal1314.time<- ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster),shape=sampling_day)) +
geom_jitter(alpha=0.2) + geom_hline(yintercept=0,color="red") +
geom_line(data=data.group,aes(x=soil_trt,y=sampling_time.soil.cluster.mean)) +
facet_grid(cluster.n~sampling_time,scales="free") + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of diurnal DEGs (day 3 and 4): fifteen clusters",color = "Cluster",y="scaled expression level")
p15.diurnal1314.time
ggsave(p15.diurnal1314.time,file="../output/diurnal1314.time.DEG.Kmean.15clusters.png",width=11,height=15)
# 6 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread$transcript_ID, cluster=kClusters.diurnal34.time.6) %>%
group_by(cluster) %>%
nest(transcripts=transcript_ID) %>%
mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.))))
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 960 GO:0009737 6.904754e-09 1.0000000 53
## 2239 GO:0042542 1.361915e-07 1.0000000 16
## 859 GO:0009414 1.422210e-07 1.0000000 40
## 921 GO:0009651 3.073264e-07 1.0000000 57
## 2795 GO:0051259 1.204131e-06 1.0000000 6
## 359 GO:0006355 2.052973e-06 0.9999990 120
## 854 GO:0009408 8.029617e-06 0.9999975 23
## 3749 GO:2000377 2.200039e-05 0.9999976 8
## numInCat term ontology
## 960 832 response to abscisic acid BP
## 2239 130 response to hydrogen peroxide BP
## 859 596 response to water deprivation BP
## 921 1045 response to salt stress BP
## 2795 22 protein complex oligomerization BP
## 359 2992 regulation of transcription, DNA-templated BP
## 854 305 response to heat BP
## 3749 41 regulation of reactive oxygen species metabolic process BP
## over_represented_padjust
## 960 2.616211e-05
## 2239 1.796251e-04
## 859 1.796251e-04
## 921 2.911150e-04
## 2795 9.124907e-04
## 359 1.296452e-03
## 854 4.346317e-03
## 3749 1.041993e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 359 GO:0006355 3.068091e-13 1.0000000 202
## 3270 GO:0080167 6.288627e-08 1.0000000 31
## 2448 GO:0045490 8.427229e-06 0.9999974 23
## 1737 GO:0019761 1.003628e-05 0.9999982 13
## 1052 GO:0009909 3.199823e-05 0.9999907 19
## 2688 GO:0048573 4.684771e-05 0.9999899 13
## 1047 GO:0009901 7.092009e-05 0.9999895 9
## 918 GO:0009648 7.106530e-05 0.9999945 6
## 3086 GO:0071249 7.771098e-05 0.9999903 8
## 2973 GO:0061408 1.143176e-04 0.9999817 9
## 898 GO:0009625 1.328304e-04 0.9999701 12
## 956 GO:0009733 1.618924e-04 0.9999190 42
## 2908 GO:0055073 1.956185e-04 0.9999973 3
## 3238 GO:0080112 2.231339e-04 0.9999978 3
## 3650 GO:1905039 2.231339e-04 0.9999978 3
## 3655 GO:1905200 2.231339e-04 0.9999978 3
## 1463 GO:0015770 2.392384e-04 0.9999696 7
## 1210 GO:0010200 2.414460e-04 0.9999023 25
## 909 GO:0009639 2.460730e-04 0.9999456 11
## numInCat
## 359 2992
## 3270 254
## 2448 175
## 1737 69
## 1052 162
## 2688 83
## 1047 41
## 918 20
## 3086 30
## 2973 47
## 898 84
## 956 612
## 2908 5
## 3238 4
## 3650 4
## 3655 4
## 1463 30
## 1210 286
## 909 69
## term
## 359 regulation of transcription, DNA-templated
## 3270 response to karrikin
## 2448 pectin catabolic process
## 1737 glucosinolate biosynthetic process
## 1052 regulation of flower development
## 2688 photoperiodism, flowering
## 1047 anther dehiscence
## 918 photoperiodism
## 3086 cellular response to nitrate
## 2973 positive regulation of transcription from RNA polymerase II promoter in response to heat stress
## 898 response to insect
## 956 response to auxin
## 2908 cadmium ion homeostasis
## 3238 seed growth
## 3650 carboxylic acid transmembrane transport
## 3655 gibberellic acid transmembrane transport
## 1463 sucrose transport
## 1210 response to chitin
## 909 response to red or far red light
## ontology over_represented_padjust
## 359 BP 1.162500e-09
## 3270 BP 1.191380e-04
## 2448 BP 9.506870e-03
## 1737 BP 9.506870e-03
## 1052 BP 2.424826e-02
## 2688 BP 2.958433e-02
## 1047 BP 3.271632e-02
## 918 BP 3.271632e-02
## 3086 BP 3.271632e-02
## 2973 BP 4.331495e-02
## 898 BP 4.575404e-02
## 956 BP 4.907214e-02
## 2908 BP 4.907214e-02
## 3238 BP 4.907214e-02
## 3650 BP 4.907214e-02
## 3655 BP 4.907214e-02
## 1463 BP 4.907214e-02
## 1210 BP 4.907214e-02
## 909 BP 4.907214e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 26 GO:0000103 3.687865e-06 0.9999998 7
## 1656 GO:0019252 1.225072e-05 0.9999983 10
## 211 GO:0005983 1.286419e-05 0.9999987 8
## 1098 GO:0010021 2.955039e-05 0.9999989 5
## 416 GO:0006446 4.381791e-05 0.9999968 6
## 1688 GO:0019419 5.249899e-05 0.9999987 4
## 32 GO:0000162 1.003052e-04 0.9999887 7
## 3412 GO:0098869 1.029735e-04 0.9999936 5
## 210 GO:0005982 1.148782e-04 0.9999871 7
## numInCat term ontology
## 26 24 sulfate assimilation BP
## 1656 54 starch biosynthetic process BP
## 211 33 starch catabolic process BP
## 1098 11 amylopectin biosynthetic process BP
## 416 25 regulation of translational initiation BP
## 1688 8 sulfate reduction BP
## 32 35 tryptophan biosynthetic process BP
## 3412 20 cellular oxidant detoxification BP
## 210 30 starch metabolic process BP
## over_represented_padjust
## 26 0.01397332
## 1656 0.01624747
## 211 0.01624747
## 1098 0.02799161
## 416 0.03315311
## 1688 0.03315311
## 32 0.04836372
## 3412 0.04836372
## 210 0.04836372
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 423 GO:0006468 1.044821e-07 1.0000000 53
## 1859 GO:0031349 2.094012e-06 0.9999999 5
## 713 GO:0007169 3.359777e-06 0.9999993 15
## numInCat term
## 423 1484 protein phosphorylation
## 1859 13 positive regulation of defense response
## 713 199 transmembrane receptor protein tyrosine kinase signaling pathway
## ontology over_represented_padjust
## 423 BP 0.0003958828
## 1859 BP 0.0039671056
## 713 BP 0.0042433981
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 859 GO:0009414 9.142363e-18 1.0000000 55
## 854 GO:0009408 1.210338e-15 1.0000000 36
## 2913 GO:0055085 1.817530e-12 1.0000000 50
## 594 GO:0006833 6.148887e-12 1.0000000 12
## 921 GO:0009651 3.950814e-10 1.0000000 59
## 960 GO:0009737 3.147896e-08 1.0000000 47
## 2088 GO:0034605 1.229675e-07 1.0000000 15
## 952 GO:0009723 6.353837e-07 0.9999998 21
## 359 GO:0006355 9.461339e-07 0.9999996 109
## 2575 GO:0046686 9.557011e-07 0.9999996 41
## 914 GO:0009644 1.020333e-06 0.9999998 13
## 2747 GO:0050821 3.509374e-06 0.9999998 7
## 739 GO:0007623 1.005242e-05 0.9999976 16
## 1434 GO:0015670 1.115771e-05 1.0000000 3
## 1724 GO:0019676 1.376845e-05 0.9999996 5
## 973 GO:0009753 1.725343e-05 0.9999946 22
## 3164 GO:0071732 3.093638e-05 0.9999964 8
## 2106 GO:0034765 3.213091e-05 0.9999970 7
## 646 GO:0006979 3.245504e-05 0.9999878 27
## 419 GO:0006457 3.380041e-05 0.9999882 24
## 3114 GO:0071369 6.554065e-05 0.9999913 8
## 2196 GO:0042026 7.267651e-05 0.9999870 10
## 1210 GO:0010200 7.892983e-05 0.9999757 18
## 962 GO:0009739 1.333565e-04 0.9999645 14
## 3541 GO:1902289 1.361178e-04 0.9999946 4
## 649 GO:0006986 1.436639e-04 0.9999863 6
## 602 GO:0006855 1.456259e-04 0.9999637 13
## 1094 GO:0010017 1.756139e-04 0.9999774 7
## 2452 GO:0045595 1.887136e-04 0.9999969 3
## 2257 GO:0042754 1.988722e-04 0.9999972 3
## 2486 GO:0045892 3.140835e-04 0.9998844 20
## 2091 GO:0034620 3.217688e-04 0.9999635 6
## 2280 GO:0042853 3.548542e-04 0.9999921 3
## 642 GO:0006970 3.739193e-04 0.9998788 16
## 3450 GO:1900150 3.873183e-04 0.9999542 6
## numInCat term ontology
## 859 596 response to water deprivation BP
## 854 305 response to heat BP
## 2913 562 transmembrane transport BP
## 594 34 water transport BP
## 921 1045 response to salt stress BP
## 960 832 response to abscisic acid BP
## 2088 117 cellular response to heat BP
## 952 255 response to ethylene BP
## 359 2992 regulation of transcription, DNA-templated BP
## 2575 753 response to cadmium ion BP
## 914 109 response to high light intensity BP
## 2747 28 protein stabilization BP
## 739 181 circadian rhythm BP
## 1434 3 carbon dioxide transport BP
## 1724 10 ammonia assimilation cycle BP
## 973 338 response to jasmonic acid BP
## 3164 52 cellular response to nitric oxide BP
## 2106 33 regulation of ion transmembrane transport BP
## 646 502 response to oxidative stress BP
## 419 423 protein folding BP
## 3114 62 cellular response to ethylene stimulus BP
## 2196 97 protein refolding BP
## 1210 286 response to chitin BP
## 962 208 response to gibberellin BP
## 3541 13 negative regulation of defense response to oomycetes BP
## 649 31 response to unfolded protein BP
## 602 130 drug transmembrane transport BP
## 1094 52 red or far-red light signaling pathway BP
## 2452 6 regulation of cell differentiation BP
## 2257 5 negative regulation of circadian rhythm BP
## 2486 355 negative regulation of transcription, DNA-templated BP
## 2091 38 cellular response to unfolded protein BP
## 2280 7 L-alanine catabolic process BP
## 642 252 response to osmotic stress BP
## 3450 40 regulation of defense response to fungus BP
## over_represented_padjust
## 859 3.464042e-14
## 854 2.292986e-12
## 2913 2.295540e-09
## 594 5.824533e-09
## 921 2.993927e-07
## 960 1.987896e-05
## 2088 6.656056e-05
## 952 3.009336e-04
## 359 3.514583e-04
## 2575 3.514583e-04
## 914 3.514583e-04
## 2747 1.108085e-03
## 739 2.929894e-03
## 1434 3.019754e-03
## 1724 3.477909e-03
## 973 4.085828e-03
## 3164 6.403487e-03
## 2106 6.403487e-03
## 646 6.403487e-03
## 419 6.403487e-03
## 3114 1.182541e-02
## 2196 1.251688e-02
## 1210 1.300283e-02
## 962 2.043617e-02
## 3541 2.043617e-02
## 649 2.043617e-02
## 602 2.043617e-02
## 1094 2.376433e-02
## 2452 2.465641e-02
## 2257 2.511755e-02
## 2486 3.809943e-02
## 2091 3.809943e-02
## 2280 4.074371e-02
## 642 4.167001e-02
## 3450 4.192998e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 364 GO:0006364 1.422386e-13 1.0000000 24
## 2803 GO:0051301 7.287253e-07 0.9999998 28
## 880 GO:0009561 1.724029e-06 0.9999999 8
## 755 GO:0008295 3.857971e-06 0.9999998 6
## 688 GO:0007088 6.679476e-06 0.9999992 9
## 871 GO:0009451 1.679454e-05 0.9999947 22
## 2471 GO:0045787 2.013055e-05 0.9999974 9
## 663 GO:0007018 2.487619e-05 0.9999949 13
## 307 GO:0006260 2.571845e-05 0.9999955 11
## 495 GO:0006596 3.083674e-05 0.9999978 6
## 473 GO:0006557 6.627647e-05 0.9999978 4
## 102 GO:0000494 9.621584e-05 0.9999986 3
## 3681 GO:1990258 9.621584e-05 0.9999986 3
## 312 GO:0006268 1.011262e-04 0.9999912 6
## 1851 GO:0031167 1.074289e-04 0.9999960 4
## 1540 GO:0016075 1.155552e-04 0.9999955 4
## 314 GO:0006270 1.277248e-04 0.9999880 6
## 496 GO:0006597 1.391222e-04 0.9999943 4
## 146 GO:0001510 1.645776e-04 0.9999928 4
## 112 GO:0000724 1.800148e-04 0.9999680 9
## 69 GO:0000381 1.870324e-04 0.9999805 6
## 50 GO:0000278 1.890979e-04 0.9999543 12
## 22 GO:0000079 1.953523e-04 0.9999692 8
## 1651 GO:0019079 2.150943e-04 0.9999957 3
## 555 GO:0006744 2.516847e-04 0.9999801 5
## 836 GO:0009294 2.671875e-04 0.9999633 7
## 343 GO:0006325 2.853236e-04 0.9999391 10
## 3035 GO:0070828 3.147048e-04 0.9999922 3
## 704 GO:0007142 3.596521e-04 0.9999919 3
## 3503 GO:1901565 3.679428e-04 0.9999986 2
## 2860 GO:0051726 3.802784e-04 0.9999077 11
## numInCat
## 364 160
## 2803 419
## 880 36
## 755 22
## 688 61
## 871 273
## 2471 68
## 663 108
## 307 96
## 495 29
## 473 13
## 102 7
## 3681 7
## 312 27
## 1851 13
## 1540 14
## 314 32
## 496 15
## 146 16
## 112 82
## 69 45
## 50 159
## 22 76
## 1651 8
## 555 27
## 836 51
## 343 120
## 3035 10
## 704 7
## 3503 3
## 2860 151
## term
## 364 rRNA processing
## 2803 cell division
## 880 megagametogenesis
## 755 spermidine biosynthetic process
## 688 regulation of mitotic nuclear division
## 871 RNA modification
## 2471 positive regulation of cell cycle
## 663 microtubule-based movement
## 307 DNA replication
## 495 polyamine biosynthetic process
## 473 S-adenosylmethioninamine biosynthetic process
## 102 box C/D snoRNA 3'-end processing
## 3681 histone glutamine methylation
## 312 DNA unwinding involved in DNA replication
## 1851 rRNA methylation
## 1540 rRNA catabolic process
## 314 DNA replication initiation
## 496 spermine biosynthetic process
## 146 RNA methylation
## 112 double-strand break repair via homologous recombination
## 69 regulation of alternative mRNA splicing, via spliceosome
## 50 mitotic cell cycle
## 22 regulation of cyclin-dependent protein serine/threonine kinase activity
## 1651 viral genome replication
## 555 ubiquinone biosynthetic process
## 836 DNA mediated transformation
## 343 chromatin organization
## 3035 heterochromatin organization
## 704 male meiosis II
## 3503 organonitrogen compound catabolic process
## 2860 regulation of cell cycle
## ontology over_represented_padjust
## 364 BP 5.389421e-10
## 2803 BP 1.380570e-03
## 880 BP 2.177449e-03
## 755 BP 3.654463e-03
## 688 BP 5.061707e-03
## 871 BP 1.060575e-02
## 2471 BP 1.082747e-02
## 663 BP 1.082747e-02
## 307 BP 1.082747e-02
## 495 BP 1.168404e-02
## 473 BP 2.282923e-02
## 102 BP 2.713654e-02
## 3681 BP 2.713654e-02
## 312 BP 2.713654e-02
## 1851 BP 2.713654e-02
## 1540 BP 2.736491e-02
## 314 BP 2.846761e-02
## 496 BP 2.928523e-02
## 146 BP 3.218216e-02
## 112 BP 3.218216e-02
## 69 BP 3.218216e-02
## 50 BP 3.218216e-02
## 22 BP 3.218216e-02
## 1651 BP 3.395802e-02
## 555 BP 3.814533e-02
## 836 BP 3.893744e-02
## 343 BP 4.004042e-02
## 3035 BP 4.258630e-02
## 704 BP 4.647118e-02
## 3503 BP 4.647118e-02
## 2860 BP 4.647983e-02
# using unnest()
temp %>% unnest(GO_result) %>% write_csv(path="../output/diurnal34.time.DEG.Kmeans.6cluster.csv")
# 15 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread$transcript_ID, cluster=kClusters.diurnal34.time.15) %>%
group_by(cluster) %>%
nest(transcripts=transcript_ID) %>%
mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.))))
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 211 GO:0005983 1.572317e-08 1.0000000 8
## 739 GO:0007623 2.101937e-08 1.0000000 14
## 855 GO:0009409 3.356804e-05 0.9999895 20
## numInCat term ontology over_represented_padjust
## 211 33 starch catabolic process BP 0.0000398212
## 739 181 circadian rhythm BP 0.0000398212
## 855 696 response to cold BP 0.0423964320
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 744 GO:0008150 6.341344e-08 1.0000000 72
## 345 GO:0006334 9.184863e-06 0.9999996 5
## numInCat term ontology over_represented_padjust
## 744 6445 biological_process BP 0.0002402735
## 345 44 nucleosome assembly BP 0.0174007232
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 364 GO:0006364 2.764915e-14 1.0000000 17
## 69 GO:0000381 8.545837e-07 1.0000000 6
## 146 GO:0001510 3.621179e-06 0.9999999 4
## 102 GO:0000494 5.871156e-06 1.0000000 3
## 3681 GO:1990258 5.871156e-06 1.0000000 3
## 2437 GO:0045292 7.760361e-06 0.9999997 5
## 3035 GO:0070828 1.909049e-05 0.9999998 3
## 419 GO:0006457 2.666667e-05 0.9999943 13
## 2455 GO:0045604 2.798915e-05 0.9999998 3
## 2656 GO:0048444 3.055743e-05 0.9999991 4
## 855 GO:0009409 7.790258e-05 0.9999780 16
## 868 GO:0009446 8.884053e-05 0.9999985 3
## 2003 GO:0033388 8.884053e-05 0.9999985 3
## 1742 GO:0019856 9.872194e-05 0.9999984 3
## 1851 GO:0031167 1.226410e-04 0.9999977 3
## 3638 GO:1904812 1.691001e-04 1.0000000 2
## 1367 GO:0010499 1.854115e-04 0.9999906 4
## numInCat term
## 364 160 rRNA processing
## 69 45 regulation of alternative mRNA splicing, via spliceosome
## 146 16 RNA methylation
## 102 7 box C/D snoRNA 3'-end processing
## 3681 7 histone glutamine methylation
## 2437 39 mRNA cis splicing, via spliceosome
## 3035 10 heterochromatin organization
## 419 423 protein folding
## 2455 6 regulation of epidermal cell differentiation
## 2656 19 floral organ morphogenesis
## 855 696 response to cold
## 868 11 putrescine biosynthetic process
## 2003 11 putrescine biosynthetic process from arginine
## 1742 9 pyrimidine nucleobase biosynthetic process
## 1851 13 rRNA methylation
## 3638 2 rRNA acetylation involved in maturation of SSU-rRNA
## 1367 49 proteasomal ubiquitin-independent protein catabolic process
## ontology over_represented_padjust
## 364 BP 1.047626e-10
## 69 BP 1.619009e-03
## 146 BP 4.449162e-03
## 102 BP 4.449162e-03
## 3681 BP 4.449162e-03
## 2437 BP 4.900668e-03
## 3035 BP 1.033341e-02
## 419 BP 1.157821e-02
## 2455 BP 1.157821e-02
## 2656 BP 1.157821e-02
## 855 BP 2.589360e-02
## 868 BP 2.589360e-02
## 2003 BP 2.589360e-02
## 1742 BP 2.671839e-02
## 1851 BP 3.097912e-02
## 3638 BP 4.004501e-02
## 1367 BP 4.132496e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 69 GO:0000381 3.928140e-10 1.0000000 9
## 499 GO:0006606 2.366654e-09 1.0000000 10
## 2437 GO:0045292 6.300805e-08 1.0000000 7
## 762 GO:0008380 5.818397e-06 0.9999992 10
## 76 GO:0000398 1.103801e-05 0.9999984 10
## 398 GO:0006421 7.621456e-05 0.9999989 3
## 75 GO:0000395 8.351978e-05 0.9999987 3
## numInCat term ontology
## 69 45 regulation of alternative mRNA splicing, via spliceosome BP
## 499 67 protein import into nucleus BP
## 2437 39 mRNA cis splicing, via spliceosome BP
## 762 152 RNA splicing BP
## 76 182 mRNA splicing, via spliceosome BP
## 398 8 asparaginyl-tRNA aminoacylation BP
## 75 10 mRNA 5'-splice site recognition BP
## over_represented_padjust
## 69 1.488372e-06
## 499 4.483625e-06
## 2437 7.957916e-05
## 762 5.511477e-03
## 76 8.364602e-03
## 398 4.520806e-02
## 75 4.520806e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 854 GO:0009408 5.824553e-50 1.0000000 37
## 2088 GO:0034605 8.148688e-36 1.0000000 23
## 419 GO:0006457 1.959451e-33 1.0000000 30
## 2091 GO:0034620 1.447957e-21 1.0000000 12
## 649 GO:0006986 3.339549e-20 1.0000000 11
## 914 GO:0009644 7.547734e-20 1.0000000 14
## 2768 GO:0051085 2.403968e-19 1.0000000 13
## 2239 GO:0042542 7.162639e-19 1.0000000 14
## 2196 GO:0042026 1.077118e-16 1.0000000 12
## 2973 GO:0061408 1.067630e-12 1.0000000 8
## 892 GO:0009615 1.129479e-10 1.0000000 8
## 2795 GO:0051259 6.707294e-10 1.0000000 5
## 2775 GO:0051131 3.697003e-09 1.0000000 5
## 2747 GO:0050821 1.416684e-08 1.0000000 5
## 2575 GO:0046686 3.555674e-07 1.0000000 13
## 921 GO:0009651 3.837301e-07 0.9999999 15
## 58 GO:0000302 4.923887e-06 0.9999998 5
## 1712 GO:0019538 7.845640e-06 0.9999998 4
## 3092 GO:0071277 9.984307e-06 0.9999999 3
## 1210 GO:0010200 1.629073e-05 0.9999985 7
## 2967 GO:0061077 6.134692e-05 0.9999990 3
## 2327 GO:0043335 9.135742e-05 0.9999997 2
## numInCat
## 854 305
## 2088 117
## 419 423
## 2091 38
## 649 31
## 914 109
## 2768 82
## 2239 130
## 2196 97
## 2973 47
## 892 82
## 2795 22
## 2775 20
## 2747 28
## 2575 753
## 921 1045
## 58 96
## 1712 39
## 3092 13
## 1210 286
## 2967 30
## 2327 5
## term
## 854 response to heat
## 2088 cellular response to heat
## 419 protein folding
## 2091 cellular response to unfolded protein
## 649 response to unfolded protein
## 914 response to high light intensity
## 2768 chaperone cofactor-dependent protein refolding
## 2239 response to hydrogen peroxide
## 2196 protein refolding
## 2973 positive regulation of transcription from RNA polymerase II promoter in response to heat stress
## 892 response to virus
## 2795 protein complex oligomerization
## 2775 chaperone-mediated protein complex assembly
## 2747 protein stabilization
## 2575 response to cadmium ion
## 921 response to salt stress
## 58 response to reactive oxygen species
## 1712 protein metabolic process
## 3092 cellular response to calcium ion
## 1210 response to chitin
## 2967 chaperone-mediated protein folding
## 2327 protein unfolding
## ontology over_represented_padjust
## 854 BP 2.206923e-46
## 2088 BP 1.543769e-32
## 419 BP 2.474787e-30
## 2091 BP 1.371577e-18
## 649 BP 2.530710e-17
## 914 BP 4.766394e-17
## 2768 BP 1.301234e-16
## 2239 BP 3.392405e-16
## 2196 BP 4.534666e-14
## 2973 BP 4.045248e-10
## 892 BP 3.890541e-08
## 2795 BP 2.117828e-07
## 2775 BP 1.077534e-06
## 2747 BP 3.834154e-06
## 2575 BP 8.981632e-05
## 921 BP 9.087208e-05
## 58 BP 1.097447e-03
## 1712 BP 1.651507e-03
## 3092 BP 1.991081e-03
## 1210 BP 3.086278e-03
## 2967 BP 1.106874e-02
## 2327 BP 1.573424e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 663 GO:0007018 5.402148e-16 1.0000000 21
## 636 GO:0006949 3.481836e-10 1.0000000 7
## 688 GO:0007088 5.058197e-09 1.0000000 9
## 2471 GO:0045787 1.795912e-08 1.0000000 9
## 22 GO:0000079 2.801338e-08 1.0000000 9
## 1380 GO:0010583 4.132701e-08 1.0000000 9
## 50 GO:0000278 6.934047e-08 1.0000000 12
## 753 GO:0008284 4.326029e-07 1.0000000 9
## 2860 GO:0051726 2.218223e-06 0.9999997 10
## 312 GO:0006268 3.189839e-06 0.9999999 6
## 1010 GO:0009828 1.100601e-05 0.9999993 6
## 130 GO:0000914 1.206645e-05 0.9999998 4
## 2803 GO:0051301 2.966280e-05 0.9999923 16
## 3277 GO:0080175 7.396195e-05 0.9999993 3
## 930 GO:0009664 1.564350e-04 0.9999837 6
## numInCat
## 663 108
## 636 25
## 688 61
## 2471 68
## 22 76
## 1380 67
## 50 159
## 753 100
## 2860 151
## 312 27
## 1010 72
## 130 10
## 2803 419
## 3277 5
## 930 91
## term
## 663 microtubule-based movement
## 636 syncytium formation
## 688 regulation of mitotic nuclear division
## 2471 positive regulation of cell cycle
## 22 regulation of cyclin-dependent protein serine/threonine kinase activity
## 1380 response to cyclopentenone
## 50 mitotic cell cycle
## 753 positive regulation of cell proliferation
## 2860 regulation of cell cycle
## 312 DNA unwinding involved in DNA replication
## 1010 plant-type cell wall loosening
## 130 phragmoplast assembly
## 2803 cell division
## 3277 phragmoplast microtubule organization
## 930 plant-type cell wall organization
## ontology over_represented_padjust
## 663 BP 2.046874e-12
## 636 BP 6.596338e-07
## 688 BP 6.388502e-06
## 2471 BP 1.701178e-05
## 22 BP 2.122854e-05
## 1380 BP 2.609801e-05
## 50 BP 3.753301e-05
## 753 BP 2.048915e-04
## 2860 BP 9.338717e-04
## 312 BP 1.208630e-03
## 1010 BP 3.791072e-03
## 130 BP 3.809983e-03
## 2803 BP 8.645565e-03
## 3277 BP 2.001727e-02
## 930 BP 3.951549e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 859 GO:0009414 5.472794e-07 0.9999998 25
## 359 GO:0006355 9.154185e-06 0.9999954 67
## 960 GO:0009737 9.520552e-06 0.9999966 28
## 962 GO:0009739 4.407560e-05 0.9999916 11
## numInCat term ontology
## 859 596 response to water deprivation BP
## 359 2992 regulation of transcription, DNA-templated BP
## 960 832 response to abscisic acid BP
## 962 208 response to gibberellin BP
## over_represented_padjust
## 859 0.002073642
## 359 0.012024458
## 960 0.012024458
## 962 0.041750609
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 359 GO:0006355 1.658801e-07 1.0000000 119
## 2795 GO:0051259 3.503559e-07 1.0000000 7
## 3270 GO:0080167 2.089545e-06 0.9999994 21
## 914 GO:0009644 3.111287e-06 0.9999995 13
## 58 GO:0000302 2.089701e-05 0.9999964 11
## 859 GO:0009414 2.562366e-05 0.9999895 33
## 615 GO:0006883 3.714272e-05 0.9999991 4
## 1270 GO:0010286 3.797205e-05 0.9999931 11
## 3238 GO:0080112 5.769044e-05 0.9999996 3
## 3650 GO:1905039 5.769044e-05 0.9999996 3
## 3655 GO:1905200 5.769044e-05 0.9999996 3
## 918 GO:0009648 9.658606e-05 0.9999941 5
## 1463 GO:0015770 9.767588e-05 0.9999914 6
## 1919 GO:0032268 1.381421e-04 0.9999983 3
## numInCat term ontology
## 359 2992 regulation of transcription, DNA-templated BP
## 2795 22 protein complex oligomerization BP
## 3270 254 response to karrikin BP
## 914 109 response to high light intensity BP
## 58 96 response to reactive oxygen species BP
## 859 596 response to water deprivation BP
## 615 9 cellular sodium ion homeostasis BP
## 1270 99 heat acclimation BP
## 3238 4 seed growth BP
## 3650 4 carboxylic acid transmembrane transport BP
## 3655 4 gibberellic acid transmembrane transport BP
## 918 20 photoperiodism BP
## 1463 30 sucrose transport BP
## 1919 5 regulation of cellular protein metabolic process BP
## over_represented_padjust
## 359 0.0006285196
## 2795 0.0006637492
## 3270 0.0026390947
## 914 0.0029471667
## 58 0.0158357545
## 859 0.0161813391
## 615 0.0179845132
## 1270 0.0179845132
## 3238 0.0198717329
## 3650 0.0198717329
## 3655 0.0198717329
## 918 0.0284687622
## 1463 0.0284687622
## 1919 0.0373871617
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 859 GO:0009414 1.257030e-11 1.0000000 27
## 594 GO:0006833 5.258354e-10 1.0000000 8
## 2913 GO:0055085 1.273853e-07 1.0000000 23
## 921 GO:0009651 3.712465e-05 0.9999869 24
## numInCat term ontology over_represented_padjust
## 859 596 response to water deprivation BP 4.762889e-08
## 594 34 water transport BP 9.961951e-07
## 2913 562 transmembrane transport BP 1.608876e-04
## 921 1045 response to salt stress BP 3.516633e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 2448 GO:0045490 5.987033e-07 0.9999999 13
## numInCat term ontology over_represented_padjust
## 2448 175 pectin catabolic process BP 0.002268487
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 26 GO:0000103 1.727813e-11 1.0000000 8
## 1342 GO:0010439 1.512440e-07 1.0000000 6
## 32 GO:0000162 8.258252e-07 1.0000000 6
## 1671 GO:0019344 1.563529e-06 0.9999999 6
## 3032 GO:0070814 3.377665e-06 0.9999999 4
## 2917 GO:0055114 8.330312e-06 0.9999966 37
## 1688 GO:0019419 4.109542e-05 0.9999995 3
## 2887 GO:0052542 5.331795e-05 0.9999993 3
## 2443 GO:0045454 5.478737e-05 0.9999914 9
## 2811 GO:0051336 9.850302e-05 0.9999998 2
## 2916 GO:0055091 9.889165e-05 1.0000000 2
## 3006 GO:0070328 9.889165e-05 1.0000000 2
## numInCat term ontology
## 26 24 sulfate assimilation BP
## 1342 28 regulation of glucosinolate biosynthetic process BP
## 32 35 tryptophan biosynthetic process BP
## 1671 41 cysteine biosynthetic process BP
## 3032 13 hydrogen sulfide biosynthetic process BP
## 2917 1923 oxidation-reduction process BP
## 1688 8 sulfate reduction BP
## 2887 9 defense response by callose deposition BP
## 2443 253 cell redox homeostasis BP
## 2811 3 regulation of hydrolase activity BP
## 2916 2 phospholipid homeostasis BP
## 3006 2 triglyceride homeostasis BP
## over_represented_padjust
## 26 6.546684e-08
## 1342 2.865317e-04
## 32 1.043017e-03
## 1671 1.481053e-03
## 3032 2.559595e-03
## 2917 5.260592e-03
## 1688 2.224437e-02
## 2887 2.306548e-02
## 2443 2.306548e-02
## 2811 3.122504e-02
## 2916 3.122504e-02
## 3006 3.122504e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 473 GO:0006557 4.108192e-06 0.9999999 4
## 782 GO:0009073 8.005306e-06 0.9999997 5
## 496 GO:0006597 8.693460e-06 0.9999998 4
## 1651 GO:0019079 2.660380e-05 0.9999997 3
## 862 GO:0009423 3.843819e-05 0.9999989 4
## 755 GO:0008295 5.324627e-05 0.9999982 4
## 2913 GO:0055085 1.019863e-04 0.9999667 19
## 1426 GO:0010966 1.173930e-04 0.9999998 2
## 359 GO:0006355 1.467187e-04 0.9999235 49
## 555 GO:0006744 1.552325e-04 0.9999928 4
## 1473 GO:0015800 1.602357e-04 1.0000000 2
## 3423 GO:0110126 1.602357e-04 1.0000000 2
## 3164 GO:0071732 1.698363e-04 0.9999869 5
## 3094 GO:0071281 1.830375e-04 0.9999804 6
## numInCat term ontology
## 473 13 S-adenosylmethioninamine biosynthetic process BP
## 782 24 aromatic amino acid family biosynthetic process BP
## 496 15 spermine biosynthetic process BP
## 1651 8 viral genome replication BP
## 862 16 chorismate biosynthetic process BP
## 755 22 spermidine biosynthetic process BP
## 2913 562 transmembrane transport BP
## 1426 3 regulation of phosphate transport BP
## 359 2992 regulation of transcription, DNA-templated BP
## 555 27 ubiquinone biosynthetic process BP
## 1473 2 acidic amino acid transport BP
## 3423 2 phloem loading BP
## 3164 52 cellular response to nitric oxide BP
## 3094 77 cellular response to iron ion BP
## over_represented_padjust
## 473 0.01097984
## 782 0.01097984
## 496 0.01097984
## 1651 0.02520045
## 862 0.02912846
## 755 0.03362502
## 2913 0.04950074
## 1426 0.04950074
## 359 0.04950074
## 555 0.04950074
## 1473 0.04950074
## 3423 0.04950074
## 3164 0.04950074
## 3094 0.04953778
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 859 GO:0009414 2.207438e-07 0.9999999 23
## 960 GO:0009737 2.235441e-06 0.9999993 26
## 972 GO:0009751 4.782115e-06 0.9999990 15
## 952 GO:0009723 2.228710e-05 0.9999957 12
## 1224 GO:0010218 5.247198e-05 0.9999943 7
## 3078 GO:0071215 6.012530e-05 0.9999948 6
## 2106 GO:0034765 7.118921e-05 0.9999956 5
## 860 GO:0009415 8.989023e-05 0.9999984 3
## numInCat term ontology
## 859 596 response to water deprivation BP
## 960 832 response to abscisic acid BP
## 972 347 response to salicylic acid BP
## 952 255 response to ethylene BP
## 1224 91 response to far red light BP
## 3078 62 cellular response to abscisic acid stimulus BP
## 2106 33 regulation of ion transmembrane transport BP
## 860 13 response to water BP
## over_represented_padjust
## 859 0.0008363983
## 960 0.0042350420
## 972 0.0060398107
## 952 0.0211114602
## 1224 0.0379691243
## 3078 0.0379691243
## 2106 0.0385337044
## 860 0.0425742618
# using unnest()
temp %>% unnest(GO_result) %>% write_csv(path="../output/diurnal34.time.DEG.Kmeans.15cluster.csv")
# 6 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread$transcript_ID, cluster=kClusters.diurnal1314.time.6) %>%
group_by(cluster) %>%
nest(transcripts=transcript_ID) %>%
mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.))))
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1588 GO:0016567 1.471241e-10 1.0000000 69
## 634 GO:0006914 7.018011e-09 1.0000000 13
## 594 GO:0006833 2.275995e-08 1.0000000 10
## 359 GO:0006355 7.936683e-06 0.9999951 129
## 1587 GO:0016560 9.946419e-06 0.9999997 5
## 921 GO:0009651 1.614233e-05 0.9999919 57
## 744 GO:0008150 2.829628e-05 0.9999799 220
## 517 GO:0006635 2.903854e-05 0.9999949 11
## 916 GO:0009646 3.561472e-05 0.9999943 10
## 859 GO:0009414 7.037023e-05 0.9999683 36
## 1509 GO:0015914 7.482601e-05 0.9999960 5
## 770 GO:0008652 1.276318e-04 0.9999957 4
## 2745 GO:0050793 1.728096e-04 0.9999977 3
## 1183 GO:0010150 1.802934e-04 0.9999411 18
## numInCat term ontology
## 1588 988 protein ubiquitination BP
## 634 58 autophagy BP
## 594 34 water transport BP
## 359 2992 regulation of transcription, DNA-templated BP
## 1587 11 protein import into peroxisome matrix, docking BP
## 921 1045 response to salt stress BP
## 744 6445 biological_process BP
## 517 76 fatty acid beta-oxidation BP
## 916 67 response to absence of light BP
## 859 596 response to water deprivation BP
## 1509 15 phospholipid transport BP
## 770 9 cellular amino acid biosynthetic process BP
## 2745 5 regulation of developmental process BP
## 1183 219 leaf senescence BP
## over_represented_padjust
## 1588 5.574531e-07
## 634 1.329562e-05
## 594 2.874581e-05
## 359 7.518023e-03
## 1587 7.537396e-03
## 921 1.019388e-02
## 744 1.375338e-02
## 517 1.375338e-02
## 916 1.499380e-02
## 859 2.577416e-02
## 1509 2.577416e-02
## 770 4.029973e-02
## 2745 4.879512e-02
## 1183 4.879512e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 854 GO:0009408 8.109517e-14 1.0000000 52
## 419 GO:0006457 2.219551e-12 1.0000000 60
## 2437 GO:0045292 3.050226e-07 1.0000000 12
## 69 GO:0000381 3.274638e-07 1.0000000 13
## 211 GO:0005983 1.433359e-06 0.9999998 11
## 26 GO:0000103 2.793320e-06 0.9999998 9
## 76 GO:0000398 3.051956e-05 0.9999896 24
## 762 GO:0008380 6.536873e-05 0.9999780 22
## 2811 GO:0051336 7.685834e-05 1.0000000 3
## 2239 GO:0042542 1.023995e-04 0.9999687 18
## 32 GO:0000162 1.320460e-04 0.9999794 9
## 2768 GO:0051085 1.382397e-04 0.9999647 14
## 1628 GO:0018131 1.917710e-04 1.0000000 3
## 739 GO:0007623 1.928278e-04 0.9999276 23
## 2575 GO:0046686 1.975734e-04 0.9998852 65
## 855 GO:0009409 2.194944e-04 0.9998755 59
## 2088 GO:0034605 2.228418e-04 0.9999304 17
## 562 GO:0006760 2.364390e-04 0.9999976 3
## numInCat term ontology
## 854 305 response to heat BP
## 419 423 protein folding BP
## 2437 39 mRNA cis splicing, via spliceosome BP
## 69 45 regulation of alternative mRNA splicing, via spliceosome BP
## 211 33 starch catabolic process BP
## 26 24 sulfate assimilation BP
## 76 182 mRNA splicing, via spliceosome BP
## 762 152 RNA splicing BP
## 2811 3 regulation of hydrolase activity BP
## 2239 130 response to hydrogen peroxide BP
## 32 35 tryptophan biosynthetic process BP
## 2768 82 chaperone cofactor-dependent protein refolding BP
## 1628 3 oxazole or thiazole biosynthetic process BP
## 739 181 circadian rhythm BP
## 2575 753 response to cadmium ion BP
## 855 696 response to cold BP
## 2088 117 cellular response to heat BP
## 562 4 folic acid-containing compound metabolic process BP
## over_represented_padjust
## 854 3.072696e-10
## 419 4.204939e-09
## 2437 3.101901e-04
## 69 3.101901e-04
## 211 1.086200e-03
## 26 1.763981e-03
## 76 1.651980e-02
## 762 3.096027e-02
## 2811 3.235736e-02
## 2239 3.879916e-02
## 32 4.364918e-02
## 2768 4.364918e-02
## 1628 4.966751e-02
## 739 4.966751e-02
## 2575 4.966751e-02
## 855 4.966751e-02
## 2088 4.966751e-02
## 562 4.977041e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 364 GO:0006364 2.393196e-34 1.0000000 50
## 990 GO:0009793 9.754393e-11 1.0000000 65
## 254 GO:0006096 1.566697e-08 1.0000000 21
## 2196 GO:0042026 2.872373e-08 1.0000000 17
## 86 GO:0000462 1.675822e-07 1.0000000 11
## 872 GO:0009553 1.909858e-07 1.0000000 22
## 1851 GO:0031167 1.125246e-06 1.0000000 6
## 419 GO:0006457 1.200376e-06 0.9999996 33
## 880 GO:0009561 5.279449e-06 0.9999995 9
## 2211 GO:0042254 1.210502e-05 0.9999971 16
## 855 GO:0009409 2.111775e-05 0.9999905 41
## 1139 GO:0010074 5.263969e-05 0.9999990 4
## 1416 GO:0010922 5.747121e-05 1.0000000 3
## 2220 GO:0042326 5.747121e-05 1.0000000 3
## 2214 GO:0042273 6.080392e-05 0.9999950 6
## 518 GO:0006636 7.199516e-05 0.9999905 8
## 391 GO:0006413 7.772936e-05 0.9999773 17
## 390 GO:0006412 1.156609e-04 0.9999520 28
## 688 GO:0007088 1.312060e-04 0.9999781 9
## 1746 GO:0019919 1.445912e-04 0.9999907 5
## 8 GO:0000027 1.473313e-04 0.9999778 8
## 752 GO:0008283 1.595805e-04 0.9999763 8
## 9 GO:0000028 1.669040e-04 0.9999785 7
## 836 GO:0009294 1.858305e-04 0.9999680 9
## 1646 GO:0018377 1.875561e-04 1.0000000 3
## 102 GO:0000494 2.623940e-04 0.9999947 3
## 3681 GO:1990258 2.623940e-04 0.9999947 3
## 1610 GO:0017126 2.643548e-04 1.0000000 2
## 87 GO:0000463 2.973289e-04 0.9999761 5
## 3032 GO:0070814 3.926556e-04 0.9999795 4
## numInCat
## 364 160
## 990 767
## 254 138
## 2196 97
## 86 49
## 872 144
## 1851 13
## 419 423
## 880 36
## 2211 236
## 855 696
## 1139 6
## 1416 3
## 2220 3
## 2214 32
## 518 49
## 391 182
## 390 715
## 688 61
## 1746 17
## 8 72
## 752 51
## 9 61
## 836 51
## 1646 3
## 102 7
## 3681 7
## 1610 2
## 87 23
## 3032 13
## term
## 364 rRNA processing
## 990 embryo development ending in seed dormancy
## 254 glycolytic process
## 2196 protein refolding
## 86 maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)
## 872 embryo sac development
## 1851 rRNA methylation
## 419 protein folding
## 880 megagametogenesis
## 2211 ribosome biogenesis
## 855 response to cold
## 1139 maintenance of meristem identity
## 1416 positive regulation of phosphatase activity
## 2220 negative regulation of phosphorylation
## 2214 ribosomal large subunit biogenesis
## 518 unsaturated fatty acid biosynthetic process
## 391 translational initiation
## 390 translation
## 688 regulation of mitotic nuclear division
## 1746 peptidyl-arginine methylation, to asymmetrical-dimethyl arginine
## 8 ribosomal large subunit assembly
## 752 cell proliferation
## 9 ribosomal small subunit assembly
## 836 DNA mediated transformation
## 1646 protein myristoylation
## 102 box C/D snoRNA 3'-end processing
## 3681 histone glutamine methylation
## 1610 nucleologenesis
## 87 maturation of LSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)
## 3032 hydrogen sulfide biosynthetic process
## ontology over_represented_padjust
## 364 BP 9.067820e-31
## 990 BP 1.847970e-07
## 254 BP 1.978738e-05
## 2196 BP 2.720855e-05
## 86 BP 1.206075e-04
## 872 BP 1.206075e-04
## 1851 BP 5.685282e-04
## 419 BP 5.685282e-04
## 880 BP 2.222648e-03
## 2211 BP 4.586591e-03
## 855 BP 7.274105e-03
## 1139 BP 1.535907e-02
## 1416 BP 1.535907e-02
## 2220 BP 1.535907e-02
## 2214 BP 1.535907e-02
## 518 BP 1.704935e-02
## 391 BP 1.732450e-02
## 390 BP 2.434662e-02
## 688 BP 2.616524e-02
## 1746 BP 2.658278e-02
## 8 BP 2.658278e-02
## 752 BP 2.748412e-02
## 9 BP 2.749562e-02
## 836 BP 2.842600e-02
## 1646 BP 2.842600e-02
## 102 BP 3.577287e-02
## 3681 BP 3.577287e-02
## 1610 BP 3.577287e-02
## 87 BP 3.884756e-02
## 3032 BP 4.959241e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 620 GO:0006888 1.556063e-10 1.0000000 26
## 2917 GO:0055114 1.904872e-10 1.0000000 134
## 618 GO:0006886 2.498335e-09 1.0000000 42
## 1291 GO:0010345 1.341486e-08 1.0000000 13
## 960 GO:0009737 3.064785e-07 1.0000000 62
## 1564 GO:0016192 3.254301e-07 0.9999999 26
## 2634 GO:0048280 1.789761e-06 0.9999999 7
## 1933 GO:0032482 1.531185e-05 0.9999970 13
## 1008 GO:0009826 1.560475e-05 0.9999946 26
## 1523 GO:0015991 1.684641e-05 0.9999970 12
## 675 GO:0007035 2.095010e-05 0.9999983 7
## 278 GO:0006152 2.128646e-05 0.9999997 4
## 859 GO:0009414 2.477618e-05 0.9999885 44
## 2716 GO:0048767 3.373106e-05 0.9999924 14
## 660 GO:0007010 3.578244e-05 0.9999937 11
## 662 GO:0007017 5.510316e-05 0.9999907 10
## 2241 GO:0042546 6.865808e-05 0.9999844 13
## 2478 GO:0045839 8.048514e-05 0.9999950 5
## 41 GO:0000226 9.013085e-05 0.9999789 13
## 921 GO:0009651 9.382861e-05 0.9999469 65
## 2201 GO:0042147 1.111789e-04 0.9999845 8
## 995 GO:0009807 1.151905e-04 0.9999896 6
## 515 GO:0006631 1.720986e-04 0.9999601 12
## 3473 GO:1901001 1.754371e-04 0.9999833 6
## 2470 GO:0045786 1.967869e-04 0.9999850 5
## 1327 GO:0010411 2.258519e-04 0.9999501 11
## 517 GO:0006635 2.303113e-04 0.9999491 11
## 2114 GO:0034976 2.949048e-04 0.9999454 9
## numInCat term ontology
## 620 166 ER to Golgi vesicle-mediated transport BP
## 2917 1923 oxidation-reduction process BP
## 618 428 intracellular protein transport BP
## 1291 41 suberin biosynthetic process BP
## 960 832 response to abscisic acid BP
## 1564 244 vesicle-mediated transport BP
## 2634 20 vesicle fusion with Golgi apparatus BP
## 1933 109 Rab protein signal transduction BP
## 1008 256 unidimensional cell growth BP
## 1523 81 ATP hydrolysis coupled proton transport BP
## 675 23 vacuolar acidification BP
## 278 6 purine nucleoside catabolic process BP
## 859 596 response to water deprivation BP
## 2716 99 root hair elongation BP
## 660 67 cytoskeleton organization BP
## 662 65 microtubule-based process BP
## 2241 95 cell wall biogenesis BP
## 2478 27 negative regulation of mitotic nuclear division BP
## 41 98 microtubule cytoskeleton organization BP
## 921 1045 response to salt stress BP
## 2201 45 retrograde transport, endosome to Golgi BP
## 995 28 lignan biosynthetic process BP
## 515 85 fatty acid metabolic process BP
## 3473 24 negative regulation of response to salt stress BP
## 2470 30 negative regulation of cell cycle BP
## 1327 77 xyloglucan metabolic process BP
## 517 76 fatty acid beta-oxidation BP
## 2114 55 response to endoplasmic reticulum stress BP
## over_represented_padjust
## 620 3.608781e-07
## 2917 3.608781e-07
## 618 3.155397e-06
## 1291 1.270723e-05
## 960 2.055091e-04
## 1564 2.055091e-04
## 2634 9.687718e-04
## 1933 6.383104e-03
## 1008 6.383104e-03
## 1523 6.383104e-03
## 675 6.721200e-03
## 278 6.721200e-03
## 859 7.221303e-03
## 2716 9.038644e-03
## 660 9.038644e-03
## 662 1.304912e-02
## 2241 1.530268e-02
## 2478 1.694212e-02
## 41 1.777583e-02
## 921 1.777583e-02
## 2201 1.983894e-02
## 995 1.983894e-02
## 515 2.769713e-02
## 3473 2.769713e-02
## 2470 2.982503e-02
## 1327 3.232036e-02
## 517 3.232036e-02
## 2114 3.990694e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 359 GO:0006355 3.484240e-16 1.0000000 191
## 957 GO:0009734 1.013408e-08 1.0000000 40
## 859 GO:0009414 2.487774e-07 1.0000000 51
## 862 GO:0009423 2.699999e-07 1.0000000 8
## 956 GO:0009733 3.197026e-07 0.9999999 47
## 782 GO:0009073 6.406436e-07 1.0000000 9
## 2486 GO:0045892 5.542665e-06 0.9999979 33
## 2712 GO:0048756 1.071621e-05 0.9999999 4
## 739 GO:0007623 1.173405e-05 0.9999966 21
## 960 GO:0009737 1.315675e-05 0.9999935 59
## 1380 GO:0010583 2.770717e-05 0.9999948 12
## 2575 GO:0046686 3.146692e-05 0.9999840 54
## 2586 GO:0046827 4.819892e-05 1.0000000 3
## 153 GO:0001666 6.228472e-05 0.9999861 13
## 1278 GO:0010315 8.760525e-05 0.9999871 9
## 2578 GO:0046719 1.186981e-04 0.9999991 3
## 472 GO:0006556 1.295855e-04 0.9999960 4
## 646 GO:0006979 1.321049e-04 0.9999392 35
## 2646 GO:0048364 1.426994e-04 0.9999364 33
## 3270 GO:0080167 1.467287e-04 0.9999466 22
## 992 GO:0009800 1.501745e-04 0.9999922 5
## 946 GO:0009699 1.798635e-04 0.9999741 8
## 1737 GO:0019761 2.377272e-04 0.9999519 10
## 908 GO:0009638 2.477560e-04 0.9999574 9
## 921 GO:0009651 2.871091e-04 0.9998322 62
## 1183 GO:0010150 2.934480e-04 0.9998909 21
## 2689 GO:0048574 2.946741e-04 0.9999612 7
## numInCat term ontology
## 359 2992 regulation of transcription, DNA-templated BP
## 957 377 auxin-activated signaling pathway BP
## 859 596 response to water deprivation BP
## 862 16 chorismate biosynthetic process BP
## 956 612 response to auxin BP
## 782 24 aromatic amino acid family biosynthetic process BP
## 2486 355 negative regulation of transcription, DNA-templated BP
## 2712 5 sieve cell differentiation BP
## 739 181 circadian rhythm BP
## 960 832 response to abscisic acid BP
## 1380 67 response to cyclopentenone BP
## 2575 753 response to cadmium ion BP
## 2586 3 positive regulation of protein export from nucleus BP
## 153 83 response to hypoxia BP
## 1278 34 auxin efflux BP
## 2578 4 regulation by virus of viral protein levels in host cell BP
## 472 8 S-adenosylmethionine biosynthetic process BP
## 646 502 response to oxidative stress BP
## 2646 326 root development BP
## 3270 254 response to karrikin BP
## 992 10 cinnamic acid biosynthetic process BP
## 946 34 phenylpropanoid biosynthetic process BP
## 1737 69 glucosinolate biosynthetic process BP
## 908 37 phototropism BP
## 921 1045 response to salt stress BP
## 1183 219 leaf senescence BP
## 2689 30 long-day photoperiodism, flowering BP
## over_represented_padjust
## 359 1.320178e-12
## 957 1.919902e-05
## 859 2.422706e-04
## 862 2.422706e-04
## 956 2.422706e-04
## 782 4.045664e-04
## 2486 3.000165e-03
## 2712 4.940037e-03
## 739 4.940037e-03
## 960 4.985094e-03
## 1380 9.543861e-03
## 2575 9.935681e-03
## 2586 1.404813e-02
## 153 1.685692e-02
## 1278 2.212909e-02
## 2578 2.709577e-02
## 472 2.709577e-02
## 646 2.709577e-02
## 2646 2.709577e-02
## 3270 2.709577e-02
## 992 2.709577e-02
## 946 3.097739e-02
## 1737 3.911448e-02
## 908 3.911448e-02
## 921 4.135259e-02
## 1183 4.135259e-02
## 2689 4.135259e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 359 GO:0006355 3.468321e-11 1.0000000 327
## 1210 GO:0010200 3.675982e-09 1.0000000 51
## 2795 GO:0051259 7.459472e-08 1.0000000 10
## 914 GO:0009644 1.306012e-07 1.0000000 26
## 2239 GO:0042542 1.748421e-07 1.0000000 28
## 1270 GO:0010286 4.168907e-07 0.9999999 24
## 1183 GO:0010150 9.219324e-07 0.9999997 40
## 646 GO:0006979 2.174960e-06 0.9999991 68
## 58 GO:0000302 2.502004e-06 0.9999994 21
## 2917 GO:0055114 7.200448e-06 0.9999952 208
## 744 GO:0008150 1.202850e-05 0.9999905 539
## 910 GO:0009640 7.333506e-05 0.9999737 25
## 3270 GO:0080167 1.128228e-04 0.9999484 37
## 1404 GO:0010729 1.168180e-04 0.9999983 4
## 3164 GO:0071732 1.652231e-04 0.9999614 13
## numInCat term
## 359 2992 regulation of transcription, DNA-templated
## 1210 286 response to chitin
## 2795 22 protein complex oligomerization
## 914 109 response to high light intensity
## 2239 130 response to hydrogen peroxide
## 1270 99 heat acclimation
## 1183 219 leaf senescence
## 646 502 response to oxidative stress
## 58 96 response to reactive oxygen species
## 2917 1923 oxidation-reduction process
## 744 6445 biological_process
## 910 129 photomorphogenesis
## 3270 254 response to karrikin
## 1404 5 positive regulation of hydrogen peroxide biosynthetic process
## 3164 52 cellular response to nitric oxide
## ontology over_represented_padjust
## 359 BP 1.314147e-07
## 1210 BP 6.964148e-06
## 2795 BP 9.421314e-05
## 914 BP 1.237120e-04
## 2239 BP 1.324953e-04
## 1270 BP 2.632665e-04
## 1183 BP 4.990288e-04
## 646 BP 1.030116e-03
## 58 BP 1.053344e-03
## 2917 BP 2.728250e-03
## 744 BP 4.143271e-03
## 910 BP 2.315555e-02
## 3270 BP 3.161595e-02
## 1404 BP 3.161595e-02
## 3164 BP 4.173537e-02
# using unnest()
temp %>% unnest(GO_result) %>% write_csv(path="../output/diurnal1314.time.DEG.Kmeans.5cluster.csv")
# 15 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread$transcript_ID, cluster=kClusters.diurnal1314.time.15) %>%
group_by(cluster) %>%
nest(transcripts=transcript_ID) %>%
mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.))))
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1588 GO:0016567 3.849482e-10 1.0000000 45
## 634 GO:0006914 1.903491e-07 1.0000000 9
## 916 GO:0009646 1.217681e-05 0.9999987 8
## 470 GO:0006552 2.752624e-05 0.9999994 4
## 1432 GO:0015031 2.871240e-05 0.9999915 19
## 517 GO:0006635 4.185058e-05 0.9999947 8
## 1509 GO:0015914 9.164722e-05 0.9999966 4
## 13 GO:0000045 9.901920e-05 0.9999908 6
## numInCat term ontology over_represented_padjust
## 1588 988 protein ubiquitination BP 1.458569e-06
## 634 58 autophagy BP 3.606163e-04
## 916 67 response to absence of light BP 1.537931e-02
## 470 10 leucine catabolic process BP 2.175826e-02
## 1432 423 protein transport BP 2.175826e-02
## 517 76 fatty acid beta-oxidation BP 2.642864e-02
## 1509 15 phospholipid transport BP 4.689797e-02
## 13 45 autophagosome assembly BP 4.689797e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 744 GO:0008150 1.397610e-09 1 100
## 2614 GO:0048096 2.330488e-05 1 2
## numInCat term ontology
## 744 6445 biological_process BP
## 2614 2 chromatin-mediated maintenance of transcription BP
## over_represented_padjust
## 744 5.295543e-06
## 2614 4.415110e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 26 GO:0000103 2.273003e-08 1.0000000 8
## 3032 GO:0070814 1.252714e-07 1.0000000 6
## 1002 GO:0009817 1.682569e-07 1.0000000 13
## 2917 GO:0055114 6.622543e-07 0.9999998 77
## 1166 GO:0010112 6.403056e-06 0.9999997 6
## 1708 GO:0019509 2.483389e-05 0.9999994 4
## 899 GO:0009626 4.362989e-05 0.9999904 13
## 2913 GO:0055085 5.613172e-05 0.9999760 33
## 1694 GO:0019438 5.818906e-05 0.9999953 6
## 1783 GO:0030187 8.338058e-05 0.9999971 4
## numInCat term ontology
## 26 24 sulfate assimilation BP
## 3032 13 hydrogen sulfide biosynthetic process BP
## 1002 100 defense response to fungus, incompatible interaction BP
## 2917 1923 oxidation-reduction process BP
## 1166 22 regulation of systemic acquired resistance BP
## 1708 10 L-methionine salvage from methylthioadenosine BP
## 899 140 plant-type hypersensitive response BP
## 2913 562 transmembrane transport BP
## 1694 33 aromatic compound biosynthetic process BP
## 1783 13 melatonin biosynthetic process BP
## over_represented_padjust
## 26 0.0000861241
## 3032 0.0002125084
## 1002 0.0002125084
## 2917 0.0006273204
## 1166 0.0048522357
## 1708 0.0156826022
## 899 0.0236162341
## 2913 0.0244975939
## 1694 0.0244975939
## 1783 0.0315929024
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 960 GO:0009737 9.489755e-15 1.0000000 36
## 859 GO:0009414 3.108692e-14 1.0000000 30
## 860 GO:0009415 6.971840e-09 1.0000000 5
## 2913 GO:0055085 1.199155e-08 1.0000000 24
## 921 GO:0009651 3.522399e-07 0.9999999 28
## 904 GO:0009631 1.357395e-06 0.9999999 8
## 961 GO:0009738 3.664216e-06 0.9999992 16
## 642 GO:0006970 3.783913e-06 0.9999994 12
## 517 GO:0006635 8.401934e-06 0.9999993 7
## 834 GO:0009269 1.161893e-05 0.9999995 5
## 3403 GO:0098712 3.421071e-05 0.9999997 3
## 3031 GO:0070813 6.638609e-05 1.0000000 2
## numInCat term ontology
## 960 832 response to abscisic acid BP
## 859 596 response to water deprivation BP
## 860 13 response to water BP
## 2913 562 transmembrane transport BP
## 921 1045 response to salt stress BP
## 904 104 cold acclimation BP
## 961 437 abscisic acid-activated signaling pathway BP
## 642 252 response to osmotic stress BP
## 517 76 fatty acid beta-oxidation BP
## 834 37 response to desiccation BP
## 3403 7 L-glutamate import across plasma membrane BP
## 3031 2 hydrogen sulfide metabolic process BP
## over_represented_padjust
## 960 3.595668e-11
## 859 5.889416e-11
## 860 8.805434e-06
## 2913 1.135899e-05
## 921 2.669274e-04
## 904 8.571946e-04
## 961 1.792156e-03
## 642 1.792156e-03
## 517 3.537214e-03
## 834 4.402412e-03
## 3403 1.178403e-02
## 3031 2.096141e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 2437 GO:0045292 8.623797e-09 1.0000000 10
## 419 GO:0006457 1.109566e-08 1.0000000 31
## 69 GO:0000381 4.779232e-08 1.0000000 10
## 854 GO:0009408 6.337182e-07 0.9999998 23
## 211 GO:0005983 1.146102e-05 0.9999991 7
## 499 GO:0006606 3.151611e-05 0.9999956 9
## 1860 GO:0031365 3.196444e-05 0.9999993 4
## 2575 GO:0046686 4.006781e-05 0.9999824 36
## 75 GO:0000395 4.119843e-05 0.9999989 4
## 76 GO:0000398 6.765334e-05 0.9999832 14
## 762 GO:0008380 8.408582e-05 0.9999801 13
## numInCat term ontology
## 2437 39 mRNA cis splicing, via spliceosome BP
## 419 423 protein folding BP
## 69 45 regulation of alternative mRNA splicing, via spliceosome BP
## 854 305 response to heat BP
## 211 33 starch catabolic process BP
## 499 67 protein import into nucleus BP
## 1860 9 N-terminal protein amino acid modification BP
## 2575 753 response to cadmium ion BP
## 75 10 mRNA 5'-splice site recognition BP
## 76 182 mRNA splicing, via spliceosome BP
## 762 152 RNA splicing BP
## over_represented_padjust
## 2437 2.102073e-05
## 419 2.102073e-05
## 69 6.036170e-05
## 854 6.002896e-04
## 211 8.685159e-03
## 499 1.730189e-02
## 1860 1.730189e-02
## 2575 1.734454e-02
## 75 1.734454e-02
## 76 2.563385e-02
## 762 2.896374e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 662 GO:0007017 3.354846e-08 1.0000000 11
## 41 GO:0000226 1.098832e-07 1.0000000 13
## 620 GO:0006888 1.670693e-07 1.0000000 16
## 618 GO:0006886 4.872304e-06 0.9999985 24
## 2478 GO:0045839 2.238550e-05 0.9999989 5
## 239 GO:0006048 2.927616e-05 0.9999993 4
## 50 GO:0000278 3.234985e-05 0.9999930 13
## 2470 GO:0045786 4.572597e-05 0.9999975 5
## numInCat term ontology
## 662 65 microtubule-based process BP
## 41 98 microtubule cytoskeleton organization BP
## 620 166 ER to Golgi vesicle-mediated transport BP
## 618 428 intracellular protein transport BP
## 2478 27 negative regulation of mitotic nuclear division BP
## 239 9 UDP-N-acetylglucosamine biosynthetic process BP
## 50 159 mitotic cell cycle BP
## 2470 30 negative regulation of cell cycle BP
## over_represented_padjust
## 662 0.0001271151
## 41 0.0002081737
## 620 0.0002110086
## 618 0.0046152903
## 2478 0.0169637347
## 239 0.0175105132
## 50 0.0175105132
## 2470 0.0216569634
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 2917 GO:0055114 5.631648e-09 1.0000000 65
## 1126 GO:0010054 7.040200e-08 1.0000000 5
## 997 GO:0009809 3.198873e-06 0.9999996 11
## 676 GO:0007043 3.522516e-06 0.9999999 4
## 3200 GO:0072732 7.884618e-06 1.0000000 3
## 994 GO:0009805 1.800113e-05 0.9999996 4
## 2716 GO:0048767 3.284954e-05 0.9999953 9
## 957 GO:0009734 4.335307e-05 0.9999878 17
## 1663 GO:0019287 6.369490e-05 0.9999979 4
## 1125 GO:0010053 8.531885e-05 0.9999924 6
## 1008 GO:0009826 9.928023e-05 0.9999742 14
## 3110 GO:0071365 1.395094e-04 0.9999900 5
## numInCat term
## 2917 1923 oxidation-reduction process
## 1126 8 trichoblast differentiation
## 997 116 lignin biosynthetic process
## 676 12 cell-cell junction assembly
## 3200 4 cellular response to calcium ion starvation
## 994 11 coumarin biosynthetic process
## 2716 99 root hair elongation
## 957 377 auxin-activated signaling pathway
## 1663 13 isopentenyl diphosphate biosynthetic process, mevalonate pathway
## 1125 42 root epidermal cell differentiation
## 1008 256 unidimensional cell growth
## 3110 36 cellular response to auxin stimulus
## ontology over_represented_padjust
## 2917 BP 2.133832e-05
## 1126 BP 1.333766e-04
## 997 BP 3.336703e-03
## 676 BP 3.336703e-03
## 3200 BP 5.974964e-03
## 994 BP 1.136772e-02
## 2716 BP 1.778099e-02
## 957 BP 2.053310e-02
## 1663 BP 2.681555e-02
## 1125 BP 3.232731e-02
## 1008 BP 3.419753e-02
## 3110 BP 4.405009e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 364 GO:0006364 4.792368e-47 1.0000000 49
## 990 GO:0009793 4.674821e-14 1.0000000 49
## 419 GO:0006457 1.414484e-10 1.0000000 28
## 86 GO:0000462 1.757274e-10 1.0000000 11
## 2196 GO:0042026 2.344270e-10 1.0000000 15
## 2211 GO:0042254 1.795168e-08 1.0000000 14
## 872 GO:0009553 4.048503e-08 1.0000000 17
## 390 GO:0006412 9.616713e-08 1.0000000 22
## 8 GO:0000027 4.425042e-07 1.0000000 8
## 2214 GO:0042273 6.806677e-07 1.0000000 6
## 1851 GO:0031167 9.426005e-07 1.0000000 5
## 378 GO:0006396 2.453011e-06 0.9999997 11
## 1746 GO:0019919 4.730782e-06 0.9999999 5
## 880 GO:0009561 6.296014e-06 0.9999995 7
## 391 GO:0006413 1.059482e-05 0.9999979 13
## 87 GO:0000463 1.134493e-05 0.9999995 5
## 9 GO:0000028 1.505072e-05 0.9999990 6
## 102 GO:0000494 2.136054e-05 0.9999998 3
## 3681 GO:1990258 2.136054e-05 0.9999998 3
## 2109 GO:0034969 2.237774e-05 0.9999990 5
## 1610 GO:0017126 4.358185e-05 1.0000000 2
## 146 GO:0001510 5.474898e-05 0.9999982 4
## 289 GO:0006189 6.481222e-05 0.9999963 5
## 1144 GO:0010080 6.551240e-05 0.9999994 3
## 3059 GO:0071028 7.511706e-05 0.9999989 3
## 1192 GO:0010162 7.827587e-05 0.9999952 5
## 2768 GO:0051085 8.666422e-05 0.9999878 8
## 988 GO:0009790 1.180523e-04 0.9999729 12
## 2075 GO:0034475 1.211075e-04 0.9999978 3
## 658 GO:0007005 1.283551e-04 0.9999810 8
## 82 GO:0000454 1.509351e-04 0.9999996 2
## 752 GO:0008283 1.612840e-04 0.9999835 6
## 2575 GO:0046686 1.739260e-04 0.9999277 27
## 2416 GO:0045037 2.497229e-04 0.9999729 6
## 596 GO:0006839 2.547212e-04 0.9999712 6
## 3261 GO:0080156 2.780841e-04 0.9999685 6
## 1583 GO:0016554 2.950233e-04 0.9999749 5
## 1139 GO:0010074 3.049828e-04 0.9999941 3
## 2067 GO:0034427 3.106374e-04 0.9999921 3
## 2455 GO:0045604 3.423867e-04 0.9999932 3
## 455 GO:0006527 3.610280e-04 0.9999919 3
## 77 GO:0000413 5.254751e-04 0.9999477 5
## 3062 GO:0071035 5.482813e-04 0.9999841 3
## numInCat
## 364 160
## 990 767
## 419 423
## 86 49
## 2196 97
## 2211 236
## 872 144
## 390 715
## 8 72
## 2214 32
## 1851 13
## 378 103
## 1746 17
## 880 36
## 391 182
## 87 23
## 9 61
## 102 7
## 3681 7
## 2109 20
## 1610 2
## 146 16
## 289 21
## 1144 5
## 3059 8
## 1192 28
## 2768 82
## 988 176
## 2075 10
## 658 78
## 82 3
## 752 51
## 2575 753
## 2416 39
## 596 65
## 3261 52
## 1583 39
## 1139 6
## 2067 11
## 2455 6
## 455 7
## 77 59
## 3062 9
## term
## 364 rRNA processing
## 990 embryo development ending in seed dormancy
## 419 protein folding
## 86 maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)
## 2196 protein refolding
## 2211 ribosome biogenesis
## 872 embryo sac development
## 390 translation
## 8 ribosomal large subunit assembly
## 2214 ribosomal large subunit biogenesis
## 1851 rRNA methylation
## 378 RNA processing
## 1746 peptidyl-arginine methylation, to asymmetrical-dimethyl arginine
## 880 megagametogenesis
## 391 translational initiation
## 87 maturation of LSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)
## 9 ribosomal small subunit assembly
## 102 box C/D snoRNA 3'-end processing
## 3681 histone glutamine methylation
## 2109 histone arginine methylation
## 1610 nucleologenesis
## 146 RNA methylation
## 289 'de novo' IMP biosynthetic process
## 1144 regulation of floral meristem growth
## 3059 nuclear mRNA surveillance
## 1192 seed dormancy process
## 2768 chaperone cofactor-dependent protein refolding
## 988 embryo development
## 2075 U4 snRNA 3'-end processing
## 658 mitochondrion organization
## 82 snoRNA guided rRNA pseudouridine synthesis
## 752 cell proliferation
## 2575 response to cadmium ion
## 2416 protein import into chloroplast stroma
## 596 mitochondrial transport
## 3261 mitochondrial mRNA modification
## 1583 cytidine to uridine editing
## 1139 maintenance of meristem identity
## 2067 nuclear-transcribed mRNA catabolic process, exonucleolytic, 3'-5'
## 2455 regulation of epidermal cell differentiation
## 455 arginine catabolic process
## 77 protein peptidyl-prolyl isomerization
## 3062 nuclear polyadenylation-dependent rRNA catabolic process
## ontology over_represented_padjust
## 364 BP 1.815828e-43
## 990 BP 8.856448e-11
## 419 BP 1.664578e-07
## 86 BP 1.664578e-07
## 2196 BP 1.776488e-07
## 2211 BP 1.133649e-05
## 872 BP 2.191397e-05
## 390 BP 4.554716e-05
## 8 BP 1.862943e-04
## 2214 BP 2.579050e-04
## 1851 BP 3.246830e-04
## 378 BP 7.745382e-04
## 1746 BP 1.378841e-03
## 880 BP 1.703971e-03
## 391 BP 2.676251e-03
## 87 BP 2.686622e-03
## 9 BP 3.354540e-03
## 102 BP 4.239462e-03
## 3681 BP 4.239462e-03
## 2109 BP 4.239462e-03
## 1610 BP 7.863411e-03
## 146 BP 9.429268e-03
## 289 BP 1.034277e-02
## 1144 BP 1.034277e-02
## 3059 BP 1.138474e-02
## 1192 BP 1.140720e-02
## 2768 BP 1.216188e-02
## 988 BP 1.582332e-02
## 2075 BP 1.582332e-02
## 658 BP 1.621124e-02
## 82 BP 1.844816e-02
## 752 BP 1.909703e-02
## 2575 BP 1.996986e-02
## 2416 BP 2.757539e-02
## 596 BP 2.757539e-02
## 3261 BP 2.926835e-02
## 1583 BP 3.017962e-02
## 1139 BP 3.017962e-02
## 2067 BP 3.017962e-02
## 2455 BP 3.243258e-02
## 455 BP 3.336427e-02
## 77 BP 4.740536e-02
## 3062 BP 4.831251e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 359 GO:0006355 3.774479e-15 1.0000000 91
## 1737 GO:0019761 3.841331e-09 1.0000000 11
## 957 GO:0009734 3.980710e-08 1.0000000 21
## 992 GO:0009800 7.273723e-07 1.0000000 5
## 475 GO:0006559 3.410267e-06 0.9999998 6
## 956 GO:0009733 5.299121e-06 0.9999984 22
## 2486 GO:0045892 8.208963e-06 0.9999980 17
## 946 GO:0009699 2.078197e-05 0.9999986 6
## 3086 GO:0071249 2.698712e-05 0.9999981 6
## 793 GO:0009098 5.645881e-05 0.9999967 5
## 740 GO:0007639 1.448675e-04 0.9999976 3
## numInCat term ontology
## 359 2992 regulation of transcription, DNA-templated BP
## 1737 69 glucosinolate biosynthetic process BP
## 957 377 auxin-activated signaling pathway BP
## 992 10 cinnamic acid biosynthetic process BP
## 475 27 L-phenylalanine catabolic process BP
## 956 612 response to auxin BP
## 2486 355 negative regulation of transcription, DNA-templated BP
## 946 34 phenylpropanoid biosynthetic process BP
## 3086 30 cellular response to nitrate BP
## 793 30 leucine biosynthetic process BP
## 740 7 homeostasis of number of meristem cells BP
## over_represented_padjust
## 359 1.430150e-11
## 1737 7.277402e-06
## 957 5.027637e-05
## 992 6.890034e-04
## 475 2.584300e-03
## 956 3.346395e-03
## 2486 4.443394e-03
## 946 9.842862e-03
## 3086 1.136158e-02
## 793 2.139224e-02
## 740 4.990025e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 859 GO:0009414 8.959736e-12 1.0000000 32
## 594 GO:0006833 1.887099e-10 1.0000000 9
## 359 GO:0006355 3.154466e-09 1.0000000 75
## 739 GO:0007623 1.097169e-06 0.9999998 13
## 1434 GO:0015670 1.786928e-06 1.0000000 3
## 2198 GO:0042128 2.593663e-06 0.9999997 9
## 1195 GO:0010167 3.616853e-06 0.9999997 7
## 2913 GO:0055085 5.429333e-06 0.9999983 24
## 897 GO:0009624 3.801918e-05 0.9999929 11
## 921 GO:0009651 3.971408e-05 0.9999841 30
## 1023 GO:0009853 9.846845e-05 0.9999881 7
## numInCat term ontology
## 859 596 response to water deprivation BP
## 594 34 water transport BP
## 359 2992 regulation of transcription, DNA-templated BP
## 739 181 circadian rhythm BP
## 1434 3 carbon dioxide transport BP
## 2198 81 nitrate assimilation BP
## 1195 53 response to nitrate BP
## 2913 562 transmembrane transport BP
## 897 176 response to nematode BP
## 921 1045 response to salt stress BP
## 1023 104 photorespiration BP
## over_represented_padjust
## 859 3.394844e-08
## 594 3.575108e-07
## 359 3.984091e-06
## 739 1.039293e-03
## 1434 1.354134e-03
## 2198 1.637898e-03
## 1195 1.957751e-03
## 2913 2.571468e-03
## 897 1.504767e-02
## 921 1.504767e-02
## 1023 3.391790e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 914 GO:0009644 1.723508e-13 1.0000000 28
## 2239 GO:0042542 1.376343e-11 1.0000000 28
## 2795 GO:0051259 1.634665e-11 1.0000000 12
## 854 GO:0009408 1.130203e-09 1.0000000 42
## 58 GO:0000302 1.703843e-08 1.0000000 20
## 1270 GO:0010286 5.509471e-08 1.0000000 20
## 359 GO:0006355 1.641735e-07 1.0000000 205
## 744 GO:0008150 5.601514e-07 0.9999998 381
## 2973 GO:0061408 2.022432e-06 0.9999997 12
## 2465 GO:0045736 1.633302e-05 0.9999992 6
## 2088 GO:0034605 1.867923e-05 0.9999951 18
## 678 GO:0007050 3.200089e-05 0.9999981 6
## 2917 GO:0055114 1.058852e-04 0.9999283 133
## 1445 GO:0015706 1.509860e-04 0.9999755 9
## numInCat
## 914 109
## 2239 130
## 2795 22
## 854 305
## 58 96
## 1270 99
## 359 2992
## 744 6445
## 2973 47
## 2465 14
## 2088 117
## 678 15
## 2917 1923
## 1445 39
## term
## 914 response to high light intensity
## 2239 response to hydrogen peroxide
## 2795 protein complex oligomerization
## 854 response to heat
## 58 response to reactive oxygen species
## 1270 heat acclimation
## 359 regulation of transcription, DNA-templated
## 744 biological_process
## 2973 positive regulation of transcription from RNA polymerase II promoter in response to heat stress
## 2465 negative regulation of cyclin-dependent protein serine/threonine kinase activity
## 2088 cellular response to heat
## 678 cell cycle arrest
## 2917 oxidation-reduction process
## 1445 nitrate transport
## ontology over_represented_padjust
## 914 BP 6.530372e-10
## 2239 BP 2.064582e-08
## 2795 BP 2.064582e-08
## 854 BP 1.070585e-06
## 58 BP 1.291172e-05
## 1270 BP 3.479231e-05
## 359 BP 8.886477e-05
## 744 BP 2.653017e-04
## 2973 BP 8.514440e-04
## 2465 BP 6.188581e-03
## 2088 BP 6.434146e-03
## 678 BP 1.010428e-02
## 2917 BP 3.086146e-02
## 1445 BP 4.086327e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 862 GO:0009423 1.024617e-07 1.0000000 7
## 1210 GO:0010200 2.693419e-07 0.9999999 21
## 782 GO:0009073 1.634045e-06 0.9999999 7
## 254 GO:0006096 5.125682e-06 0.9999990 14
## 153 GO:0001666 2.146923e-05 0.9999967 10
## 1183 GO:0010150 3.261610e-05 0.9999915 16
## 155 GO:0001678 8.967007e-05 0.9999971 4
## numInCat term ontology
## 862 16 chorismate biosynthetic process BP
## 1210 286 response to chitin BP
## 782 24 aromatic amino acid family biosynthetic process BP
## 254 138 glycolytic process BP
## 153 83 response to hypoxia BP
## 1183 219 leaf senescence BP
## 155 10 cellular glucose homeostasis BP
## over_represented_padjust
## 862 0.0003882275
## 1210 0.0005102682
## 782 0.0020637991
## 254 0.0048553023
## 153 0.0162693858
## 1183 0.0205970672
## 155 0.0485371255
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 663 GO:0007018 5.321387e-14 1.0000000 19
## 688 GO:0007088 1.879943e-09 1.0000000 11
## 1380 GO:0010583 4.325007e-09 1.0000000 11
## 130 GO:0000914 4.412960e-09 1.0000000 6
## 2471 GO:0045787 7.551536e-09 1.0000000 11
## 22 GO:0000079 1.457876e-08 1.0000000 11
## 704 GO:0007142 1.807141e-08 1.0000000 5
## 753 GO:0008284 2.393789e-08 1.0000000 12
## 2860 GO:0051726 3.459615e-07 1.0000000 13
## 50 GO:0000278 6.681427e-07 0.9999999 13
## 343 GO:0006325 1.121667e-06 0.9999999 11
## 2803 GO:0051301 1.864926e-06 0.9999995 21
## 128 GO:0000911 3.477490e-05 0.9999982 5
## 1712 GO:0019538 4.825314e-05 0.9999961 6
## 3277 GO:0080175 5.382854e-05 0.9999995 3
## 1709 GO:0019510 5.840923e-05 0.9999995 3
## 739 GO:0007623 8.636543e-05 0.9999823 11
## 3035 GO:0070828 1.242185e-04 0.9999978 3
## 1996 GO:0033353 1.998934e-04 0.9999963 3
## 636 GO:0006949 2.372055e-04 0.9999878 4
## numInCat
## 663 108
## 688 61
## 1380 67
## 130 10
## 2471 68
## 22 76
## 704 7
## 753 100
## 2860 151
## 50 159
## 343 120
## 2803 419
## 128 26
## 1712 39
## 3277 5
## 1709 5
## 739 181
## 3035 10
## 1996 7
## 636 25
## term
## 663 microtubule-based movement
## 688 regulation of mitotic nuclear division
## 1380 response to cyclopentenone
## 130 phragmoplast assembly
## 2471 positive regulation of cell cycle
## 22 regulation of cyclin-dependent protein serine/threonine kinase activity
## 704 male meiosis II
## 753 positive regulation of cell proliferation
## 2860 regulation of cell cycle
## 50 mitotic cell cycle
## 343 chromatin organization
## 2803 cell division
## 128 cytokinesis by cell plate formation
## 1712 protein metabolic process
## 3277 phragmoplast microtubule organization
## 1709 S-adenosylhomocysteine catabolic process
## 739 circadian rhythm
## 3035 heterochromatin organization
## 1996 S-adenosylmethionine cycle
## 636 syncytium formation
## ontology over_represented_padjust
## 663 BP 2.016273e-10
## 688 BP 3.561553e-06
## 1380 BP 4.180177e-06
## 130 BP 4.180177e-06
## 2471 BP 5.722554e-06
## 22 BP 9.206487e-06
## 704 BP 9.781795e-06
## 753 BP 1.133758e-05
## 2860 BP 1.456498e-04
## 50 BP 2.531593e-04
## 343 BP 3.863632e-04
## 2803 BP 5.888503e-04
## 128 BP 1.013555e-02
## 1712 BP 1.305937e-02
## 3277 BP 1.359709e-02
## 1709 BP 1.383204e-02
## 739 BP 1.924933e-02
## 3035 BP 2.614799e-02
## 1996 BP 3.986295e-02
## 636 BP 4.493858e-02
# using unnest()
temp %>% unnest(GO_result) %>% write_csv(path="../output/diurnal1314.time.DEG.Kmeans.15cluster.csv")